Ejemplo n.º 1
0
def approximation(limit):
    detect = 0
    MODEL_NAME = 'obj_detection_tflite'
    GRAPH_NAME = 'detect.tflite'
    LABELMAP_NAME = 'labelmap.txt'
    min_conf_threshold = 0.6
    imW, imH = 1280, 720

    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter

    else:
        from tensorflow.lite.python.interpreter import Interpreter

    CWD_PATH = os.getcwd()

    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    if labels[0] == '???':
        del (labels[0])

    interpreter = Interpreter(model_path=PATH_TO_CKPT)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    pi_camera = PiCamera(resolution=(imW, imH), framerate=30).start()
    time.sleep(1)

    p_height = 0
    p_width = 0
    detections = 0
    approximation_detected = False
    timer_mark = timer_start = time.time()
    while timer_mark - timer_start < limit:
        print(timer_mark - timer_start)
        frame1 = pi_camera.read()

        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        boxes = interpreter.get_tensor(output_details[0]['index'])[0]  # Bounding box coordinates of detections objects
        classes = interpreter.get_tensor(output_details[1]['index'])[0]  # Class index of detections objects
        scores = interpreter.get_tensor(output_details[2]['index'])[0]  # Confidence of detections objects

        for i in range(len(scores)):

            if (scores[i] > min_conf_threshold) and (scores[i] <= 1.0):
                y_min = int(max(1, (boxes[i][0] * imH)))
                x_min = int(max(1, (boxes[i][1] * imW)))
                y_max = int(min(imH, (boxes[i][2] * imH)))
                x_max = int(min(imW, (boxes[i][3] * imW)))
                object_name = labels[int(classes[i])]

                if object_name == 'car' or object_name == 'bus' or object_name == 'truck':
                    detections += 1
                    if (y_max - y_min) > p_height * 1.15 or (x_max - x_min) > p_width * 1.15\
                            and detections > 1:
                        play_sound_notification("waiting")
                        limit += 3

                    p_height = y_max - y_min
                    p_width = x_max - x_min
        
        timer_mark = time.time()

    cv2.destroyAllWindows()
    pi_camera.stop()
Ejemplo n.º 2
0
    # Make pedictions and time it
    for i in range(N):
        print(f"On step {i}/{N}...")
        data_tmp = data[(i) * batch_size + 1:(i + 1) * batch_size, :, :, :]
        #data_tmp = data_tmp[np.newaxis, :, :, :]
        print("Shape of data_tmp:", data_tmp.shape)

        data_tmp = np.array(data_tmp, dtype=np.float32)

        if i == 0:
            interpreter.resize_tensor_input(0, [data_tmp.shape[0], 5, 512, 1])
            interpreter.allocate_tensors()

        t0 = time.time()

        interpreter.set_tensor(input_details[0]['index'], data_tmp)

        interpreter.invoke()

        t = time.time() - t0
        if i != 0:
            times.append(t)

    print(times)

    mean = np.mean(times)
    std = np.std(times)

    times_mean.append(mean)
    times_std.append(std)
Ejemplo n.º 3
0
def humancheck(self):
    human = False
    #define a name for the snapshot by giving a number
    img = "/home/pi/Desktop/snapshots/" + str(self.pic_name) + ".jpg"
    self.camera.capture(img)
    self.pic_name = self.pic_name + 1
    # Define and parse input argumets
    MODEL_NAME = 'Sample_TFLite_model'
    GRAPH_NAME = 'detect.tflite'
    LABELMAP_NAME = 'labelmap.txt'
    min_conf_threshold = 0.5

    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter

    else:
        from tensorflow.lite.python.interpreter import Interpreter

        # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
    del (labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument

    interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Load image and resize to expected shape [1xHxWx3]
    image = cv2.imread(img)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imH, imW, _ = image.shape
    image_resized = cv2.resize(image_rgb, (height, width))
    input_data = np.expand_dims(image_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[
        0]  # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(
        output_details[1]['index'])[0]  # Class index of detected objects
    scores = interpreter.get_tensor(
        output_details[2]['index'])[0]  # Confidence of detected objects

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
            # Draw label
            object_name = labels[int(
                classes[i]
            )]  # Look up object name from "labels" array using class index
            if object_name == 'person':  #if label is person stop looking and label it as a human
                human = True

                break
    if human == True:
        shutil.move(img, img[:27] + "humans/" + img[27:])
        print('Human found!', scores[i] * 100, '%')

    else:
        print('No humans found')
Ejemplo n.º 4
0
class TFInferenceEngine:
    """Thin wrapper around TFLite Interpreter.

    The official TFLite API is moving fast and still changes frequently.
    This class intends to abstract out underlying TF changes to some extend.

    It dynamically detects if EdgeTPU is available and uses it.
    Otherwise falls back to TFLite Runtime.
    """
    def __init__(self,
                 model=None,
                 labels=None,
                 confidence_threshold=0.8,
                 top_k=10):
        """Create an instance of Tensorflow inference engine.

        :Parameters:
        ----------
        model: dict
            {
                'tflite': path,
                'edgetpu': path,
            }
            Where path is of type string and points to the
            location of frozen graph file (AI model).
        labels : string
            Location of file with model labels.
        confidence_threshold : float
            Inference confidence threshold.
        top_k : type
            Inference top-k threshold.

        """
        assert model
        assert model['tflite'], 'TFLite AI model path required.'
        model_tflite = model['tflite']
        assert os.path.isfile(model_tflite), \
            'TFLite AI model file does not exist: {}' \
            .format(model_tflite)
        self._model_tflite_path = model_tflite
        model_edgetpu = model.get('edgetpu', None)
        if model_edgetpu:
            assert os.path.isfile(model_edgetpu), \
                'EdgeTPU AI model file does not exist: {}' \
                .format(model_edgetpu)
        self._model_edgetpu_path = model_edgetpu
        assert labels, 'AI model labels path required.'
        assert os.path.isfile(labels), \
            'AI model labels file does not exist: {}' \
            .format(labels)
        self._model_labels_path = labels
        self._confidence_threshold = confidence_threshold
        self._top_k = top_k
        log.debug(
            'Loading AI model:\n'
            'TFLite graph: %r\n'
            'EdgeTPU graph: %r\n'
            'Labels %r.'
            'Condidence threshod: %.0f%%'
            'top-k: %d', model_tflite, model_edgetpu, labels,
            confidence_threshold * 100, top_k)
        # EdgeTPU is not available in testing and other environments
        # load dynamically as needed
        #        edgetpu_class = 'DetectionEngine'
        #        module_object = import_module('edgetpu.detection.engine',
        #                                      packaage=edgetpu_class)
        #        target_class = getattr(module_object, edgetpu_class)
        self._tf_interpreter = _get_edgetpu_interpreter(model=model_edgetpu)
        if not self._tf_interpreter:
            log.debug('EdgeTPU not available. Will use TFLite CPU runtime.')
            self._tf_interpreter = Interpreter(model_path=model_tflite)
        assert self._tf_interpreter
        self._tf_interpreter.allocate_tensors()
        # check the type of the input tensor
        self._tf_input_details = self._tf_interpreter.get_input_details()
        self._tf_output_details = self._tf_interpreter.get_output_details()
        self._tf_is_quantized_model = \
            self.input_details[0]['dtype'] != np.float32

    @property
    def input_details(self):
        return self._tf_input_details

    @property
    def output_details(self):
        return self._tf_output_details

    @property
    def is_quantized(self):
        return self._tf_is_quantized_model

    @property
    def labels_path(self):
        """
        Location of labels file.

        :Returns:
        -------
        string
            Path to AI model labels.

        """
        return self._model_labels_path

    @property
    def confidence_threshold(self):
        """
        Inference confidence threshold.

        :Returns:
        -------
        float
            Confidence threshold for inference results.
            Only results at or above
            this threshold should be returned by each engine inference.

        """
        return self._confidence_threshold

    @property
    def top_k(self):
        """
        Inference top-k threshold.

        :Returns:
        -------
        int
            Max number of results to be returned by each inference.
            Ordered by confidence score.

        """
        return self._top_k

    def infer(self):
        """Invoke model inference on current input tensor."""
        return self._tf_interpreter.invoke()

    def set_tensor(self, index=None, tensor_data=None):
        """Set tensor data at given reference index."""
        assert isinstance(index, int)
        self._tf_interpreter.set_tensor(index, tensor_data)

    def get_tensor(self, index=None):
        """Return tensor data at given reference index."""
        assert isinstance(index, int)
        return self._tf_interpreter.get_tensor(index)
Ejemplo n.º 5
0
class objectRecog():
    def __init__(self):
        self.MODEL_NAME = 'objectRecog/model'
        self.GRAPH_NAME = 'detect.tflite'
        self.LABELMAP_NAME = 'labelmap.txt'
        self.min_conf_threshold = 0.5
        self.resW, self.resH = '1280x720'.split('x')
        self.imW, self.imH = int(self.resW), int(self.resH)
        self.use_TPU = False

        self.pkg = importlib.util.find_spec('tflite_runtime')
        if self.pkg:
            from tflite_runtime.interpreter import Interpreter
            if self.use_TPU:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if self.use_TPU:
                from tensorflow.lite.python.interpreter import load_delegate

        if self.use_TPU:
            if (self.GRAPH_NAME == 'detect.tflite'):
                self.GRAPH_NAME = 'edgetpu.tflite'

        CWD_PATH = os.getcwd()

        PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, self.GRAPH_NAME)

        PATH_TO_LABELS = os.path.join(CWD_PATH, self.MODEL_NAME,
                                      self.LABELMAP_NAME)

        print(PATH_TO_LABELS)

        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        if self.labels[0] == '???':
            del (self.labels[0])

        if self.use_TPU:
            self.interpreter = s = Interpreter(
                model_path=PATH_TO_CKPT,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

    def detObjects(self, frame1):
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects
        detectedObjects = []
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * self.imH)))
                xmin = int(max(1, (boxes[i][1] * self.imW)))
                ymax = int(min(self.imH, (boxes[i][2] * self.imH)))
                xmax = int(min(self.imW, (boxes[i][3] * self.imW)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)

                # Draw label
                object_name = self.labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
                detectedObjects.append(object_name)
        return detectedObjects, frame
Ejemplo n.º 6
0
class SsdMobileNet:
    def __init__(self, frozenGraphFilename, device='cpu'):
        self.boxes = None
        self.scores = None
        self.classes = None
        self.num_detections = None
        #self.interpreter = tf.lite.Interpreter(frozenGraphFilename)
        self.interpreter = Interpreter(frozenGraphFilename)
        self.__load_graph(device)
        self.__init_predictor()

    def __load_graph(self, device):

        # TFLITE INTERPRETER CON.
        #tf.logging.set_verbosity(tf.logging.DEBUG)
        self.interpreter.allocate_tensors()

    def __init_predictor(self):
        # obtaining the input-output shapes and types
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def predict(self,
                images,
                params,
                thresh=0,
                batch=1,
                warmup=0,
                iterations=1):
        # Warmup
        x_matrix = np.array(images)
        if params[const.PRECISION] == const.FP32:
            x_matrix = np.array(images, dtype=np.float32)

        self.interpreter.set_tensor(self.input_details[0]['index'], x_matrix)
        for i in range(warmup):
            self.interpreter.invoke()
            matrix_0 = self.interpreter.get_tensor(
                self.output_details[0]['index'])
            matrix_1 = self.interpreter.get_tensor(
                self.output_details[1]['index'])
            matrix_2 = self.interpreter.get_tensor(
                self.output_details[2]['index'])
            matrix_3 = self.interpreter.get_tensor(
                self.output_details[3]['index'])

        # Measure
        times = []
        for i in range(iterations):
            t0 = time.time()
            self.interpreter.invoke()
            boxes = self.interpreter.get_tensor(
                self.output_details[0]['index'])
            classes = self.interpreter.get_tensor(
                self.output_details[1]['index'])
            scores = self.interpreter.get_tensor(
                self.output_details[2]['index'])
            num_detections = self.interpreter.get_tensor(
                self.output_details[3]['index'])

            t1 = time.time()
            ts = t1 - t0
            times.append(ts)
        # Report
        results = {"seconds": times, "predictions": []}

        for i in range(len(num_detections)):
            thisResult = []
            for d in range(int(num_detections[0])):
                # Note the weird bbox coords: y1, x1, y2, x2 !!
                box = boxes[i][d].tolist()
                x = {
                    'score': scores[i][d].tolist(),
                    'box': [box[1], box[0], box[3], box[2]],
                    'class': (classes[i][d] + 1).tolist()
                }
                thisResult.append(x)
            results['predictions'].append(thisResult)
        return results

    def predict_runtime(self, images, params, max=5):
        x_matrix = np.array(images)
        if params[const.PRECISION] == const.FP32:
            x_matrix = np.array(images, dtype=np.float32)

        start = time.time()
        self.interpreter.invoke()
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        num_detections = self.interpreter.get_tensor(
            self.output_details[3]['index'])
        end = time.time() - start
        return end
class PoseEngine:
    """Engine used for pose tasks."""

    def __init__(self, model_path, mirror=False):
        """Creates a PoseEngine with given model.

        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally

        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        self._mirror = mirror

        edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB)
        posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB)
        self._interpreter = Interpreter(
            model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate])
        self._interpreter.allocate_tensors()
        self._input_tensor_shape = self._interpreter.get_input_details()[0]['shape']
        self._input_details = self._interpreter.get_input_details()
        if (self._input_tensor_shape.size != 4 or
                self._input_tensor_shape[3] != 3 or
                self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self.image_height, self.image_width, self.image_depth = self._input_tensor_shape


        # Auto-detect stride size
        def calcStride(h,w,L):
          return int((2*h*w)/(math.sqrt(h**2 + 4*h*L*w - 2*h*w + w**2) - h - w))

        details = self._interpreter.get_output_details()[4]
        self.heatmap_zero_point = details['quantization_parameters']['zero_points'][0]
        self.heatmap_scale = details['quantization_parameters']['scales'][0]
        heatmap_size = self._interpreter.tensor(details['index'])().nbytes
        self.stride = calcStride(self.image_height, self.image_width, heatmap_size)
        self.heatmap_size = (self.image_width // self.stride + 1, self.image_height // self.stride + 1)
        details = self._interpreter.get_output_details()[5]
        self.parts_zero_point = details['quantization_parameters']['zero_points'][0]
        self.parts_scale = details['quantization_parameters']['scales'][0]

        print("Heatmap size: ", self.heatmap_size)
        print("Stride: ", self.stride, self.heatmap_size)


    def DetectPosesInImage(self, img):
        """Detects poses in a given image.

           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.

        Args:
          img: numpy array containing image
        """

        # Extend or crop the input to match the input shape of the network.
        if img.shape[0] < self.image_height or img.shape[1] < self.image_width:
            pads = [[0, max(0, self.image_height - img.shape[0])],
                    [0, max(0, self.image_width - img.shape[1])], [0, 0]]
            img = np.pad(img, pads, mode='constant')
        img = img[0:self.image_height, 0:self.image_width]
        assert (img.shape == tuple(self._input_tensor_shape[1:]))

        # Run the inference (API expects the data to be flattened)
        inference_time, outputs = self.run_inference(img)
        poses = self._parse_poses(outputs)
        heatmap, bodyparts = self._parse_heatmaps(outputs)
        return inference_time, poses, heatmap, bodyparts

    def ParseOutputs(self, outputs):
        poses = self._parse_poses(outputs)
        heatmap, bodyparts = self._parse_heatmaps(outputs)
        return poses, heatmap, bodyparts

    def _parse_poses(self, outputs):
        keypoints = outputs[0].reshape(-1, len(KEYPOINTS), 2)
        keypoint_scores = outputs[1].reshape(-1, len(KEYPOINTS))
        pose_scores = outputs[2].flatten()
        nposes = int(outputs[3][0])

        # Convert the poses to a friendlier format of keypoints with associated
        # scores.
        poses = []
        for pose_i in range(nposes):
            keypoint_dict = {}
            for point_i, point in enumerate(keypoints[pose_i]):
                keypoint = Keypoint(KEYPOINTS[point_i], point,
                                    keypoint_scores[pose_i, point_i])
                if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1]
                keypoint_dict[KEYPOINTS[point_i]] = keypoint
            poses.append(Pose(keypoint_dict, pose_scores[pose_i]))

        return poses

    def softmax(self, y, axis):
        y = y - np.expand_dims(np.max(y, axis = axis), axis)
        y = np.exp(y)
        return y / np.expand_dims(np.sum(y, axis = axis), axis)

    def _parse_heatmaps(self, outputs):
        # Heatmaps are really float32.
        heatmap = (outputs[4].astype(np.float32) - self.heatmap_zero_point) * self.heatmap_scale
        heatmap = np.reshape(heatmap, [self.heatmap_size[1], self.heatmap_size[0]])
        part_heatmap = (outputs[5].astype(np.float32) - self.parts_zero_point) * self.parts_scale
        part_heatmap = np.reshape(part_heatmap, [self.heatmap_size[1], self.heatmap_size[0], -1])
        part_heatmap = self.softmax(part_heatmap, axis=2)
        return heatmap, part_heatmap

    def run_inference(self, input):
        start_time = time.monotonic()
        self._interpreter.set_tensor(self._input_details[0]['index'], np.expand_dims(input, axis=0))
        self._interpreter.invoke()
        duration_ms = (time.monotonic() - start_time) * 1000

        output = []
        for details in self._interpreter.get_output_details():
            tensor = self._interpreter.get_tensor(details['index'])
            output.append(tensor)

        return (duration_ms, output)
Ejemplo n.º 8
0
    def startStream(self, modeldir, graph, labels, threshold, resolution,
                    edgetpu):
        MODEL_NAME = modeldir
        GRAPH_NAME = graph
        LABELMAP_NAME = labels
        min_conf_threshold = float(threshold)
        resW, resH = resolution.split('x')
        imW, imH = int(resW), int(resH)
        use_TPU = edgetpu

        # Import TensorFlow libraries
        # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
        # If using Coral Edge TPU, import the load_delegate library
        pkg = importlib.util.find_spec('tflite_runtime')
        if pkg:
            from tflite_runtime.interpreter import Interpreter
            if use_TPU:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if use_TPU:
                from tensorflow.lite.python.interpreter import load_delegate

        # If using Edge TPU, assign filename for Edge TPU model
        if use_TPU:
            # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
            if (GRAPH_NAME == 'detect.tflite'):
                GRAPH_NAME = 'edgetpu.tflite'

        # Get path to current working directory
        CWD_PATH = os.getcwd()

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if labels[0] == '???':
            del (labels[0])

        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        if use_TPU:
            interpreter = Interpreter(
                model_path=PATH_TO_CKPT,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            interpreter = Interpreter(model_path=PATH_TO_CKPT)

        interpreter.allocate_tensors()

        # Get model details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        height = input_details[0]['shape'][1]
        width = input_details[0]['shape'][2]

        floating_model = (input_details[0]['dtype'] == np.float32)

        input_mean = 127.5
        input_std = 127.5

        # Initialize frame rate calculation
        frame_rate_calc = 1
        freq = cv2.getTickFrequency()

        # Initialize video stream
        videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
        time.sleep(1)

        # Create window
        cv2.namedWindow('Object detector', cv2.WINDOW_NORMAL)

        #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
        while True:

            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (width, height))
            input_data = np.expand_dims(frame_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()

            # Retrieve detection results
            boxes = interpreter.get_tensor(output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[
                0]  # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[
                0]  # Confidence of detected objects
            #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)
                        and (labels[int(classes[i])] == 'person')):

                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * imH)))
                    xmin = int(max(1, (boxes[i][1] * imW)))
                    ymax = int(min(imH, (boxes[i][2] * imH)))
                    xmax = int(min(imW, (boxes[i][3] * imW)))

                    # print(self.detect)

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (10, 255, 0), 2)

                    # Draw label
                    object_name = labels[int(
                        classes[i]
                    )]  # Look up object name from "labels" array using class index
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                          )  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        2)  # Get font size
                    label_ymin = max(
                        ymin, labelSize[1] + 10
                    )  # Make sure not to draw label too close to top of window
                    cv2.rectangle(
                        frame, (xmin, label_ymin - labelSize[1] - 10),
                        (xmin + labelSize[0], label_ymin + baseLine - 10),
                        (255, 255, 255),
                        cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text

                    # Draw circle in center
                    xcenter = xmin + (int(round((xmax - xmin) / 2)))
                    ycenter = ymin + (int(round((ymax - ymin) / 2)))
                    self.detect = setDetect(xcenter, ycenter, imH, imW)

                    cv2.circle(frame, (xcenter, ycenter),
                               5, (0, 0, 255),
                               thickness=-1)

                    # Print info
                    # print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')')

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc),
                        (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0),
                        2, cv2.LINE_AA)
            # All the results have been drawn on the frame, so it's time to display it.
            cv2.imshow('Object detector', frame)

            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2 - t1) / freq
            frame_rate_calc = 1 / time1

            # Press 'q' to quit
            if cv2.waitKey(1) == ord('q'):
                break

        # Clean up
        cv2.destroyAllWindows()
        videostream.stop()
Ejemplo n.º 9
0
class camera_interface():
    """
    The main interface for using the camera and determining the grip we need to be in.
    https://www.hackster.io/gatoninja236/scan-qr-codes-in-real-time-with-raspberry-pi-a5268b
      
    Attributes:
        count (int): Count of saved screenshots. File titles are frame'count'.jpg.
        cap (cv2 VideoCapture): The VideoCapture object.
        detector (QRCodeDetector): The QR Code detecting object.
    """

    def __init__(self,resolution=(640,480),framerate=30):
        self.count = 0
        # self.cap = cv2.VideoCapture(0)
        self.vs = VideoStream(resolution=(1280,720),framerate=30).start()
        # self.stream = cv2.VideoCapture(0)
        # ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
        # ret = self.stream.set(3,resolution[0])
        # ret = self.stream.set(4,resolution[1])

        #Wait for the camera to startup for one seconds
        time.sleep(1)
        print("[INFO] Created video capture object")
        print("[INFO] loading model...")

        #Load the tflite model and labelmap
        # Get path to current working directory
        GRAPH_NAME = "detect.tflite"
        MODEL_NAME = "Camera_Interpreter/Coco"
        LABELMAP_NAME = "labelmap.txt"
        CWD_PATH = os.getcwd()

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del(self.labels[0])

        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        use_TPU = False
        if use_TPU:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                    experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5
        
        # QR code detection object
        # self.detector = cv2.QRCodeDetector()
        self.cam_data = ""
        self.object_spotted = False
        self.test_count = 0
        self.killed_thread = False
        self.cam_image = None
        self.cam_image_index = 0
        self.object_spotted_T0 = 0
        self.object_not_spotted_delta_req = 3

        #Initialize the paused flag to false
        self.temp_pause = False

    def camera_read_threader(self):
        #Start the read cam thread
        read_cam = threading.Thread(target=self.read_cam_thread, args=())
        read_cam.start()
        while(self.cam_image_index == 0):
            time.sleep(0.05)
        #Start the image decode thread
        decoder = threading.Thread(target=self.decode_image_thread, args=())
        decoder.start()
        while not self.killed_thread and read_cam.is_alive() and decoder.is_alive():
            time.sleep(0.25)
        #Flag is thrown or error, so ensure flag is thrown and wait for threads to join
        self.killed_thread = True
        read_cam.join()
        decoder.join()

    def decode_image_thread(self):
        previous_index = None
        while not self.killed_thread:
            #Detect and decode the stored image if it's ready
            # t = time.time()
            if(previous_index != self.cam_image_index and (not self.temp_pause)):
                previous_index = self.cam_image_index
                # data, _, _ = self.detector.detectAndDecode(self.cam_image) Deprecated QR Code reader
                data, score = self.detect_main_object(self.cam_image)
                # print("[INFO] Camera objects: " + data)
                # if(data not in grips._value2member_map_):
                #     data = grips.openGrip.value

                #If the camera sees an object, skip the time requirement
                if(data != ""):
                    self.cam_data = data
                    self.object_spotted_T0 = time.time()
                    self.object_spotted = True
                #If the camera doesn't see an object, require a delay before reporting nothing
                else:
                    if((time.time() - self.object_spotted_T0) > self.object_not_spotted_delta_req):
                        # print("[DEBUG] Delta Req passed; reporting no object now")
                        self.cam_data = data
                        self.object_spotted = False
                
                #####No sleep since detecting/decoding takes significant time, just do it as fast as possible
            # print("[INFO] Time to decode image: " + (str(time.time() - t)))
            time.sleep(0.01)

    def detect_main_object(self, frame1):
        min_conf_threshold = 0.35

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.floating_model:
            input_data = (np.float32(input_data) - self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'],input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        # boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects

        highest_scoring_label = ""
        highest_score = 0
        for i in range(len(scores)):
            object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index
            if((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (scores[i] > highest_score) and (object_name in grips._value2member_map_)):
                # Draw label
                highest_scoring_label = object_name
                highest_score = scores[i]

        return (highest_scoring_label, highest_score)

    def read_cam_thread(self):
        while not self.killed_thread:
            if(not self.temp_pause):
                # t = time.time()
                #Get camera image, rescale, and store in class variable
                frame = self.vs.read()
                self.cam_image = imutils.resize(frame, width=400)
                
                #Increase index by 1
                self.cam_image_index += 1
                #Pause temply
                time.sleep(0.2)
                # print("Time to save/resize new image: " + (str(time.time() - t)))

    # def read_cam(self):
    #     # get the image
    #     _, img = self.cap.read() #TODO: #14 Downscale the resolution for faster processing
    #     # get bounding box coords and data
    #     data, bbox, _ = self.detector.detectAndDecode(img)
    #     #Define a parameter we can easily read later if anything is detected
    #     is_object = False
    #     #Update parameter/output the data we found, if any
    #     if data:
    #         #print("data found: ", data)
    #         is_object = True
    #     #return the information we got from the camera
    #     # cv2.imwrite("frame1.jpg", img)     # save frame as JPEG file
    #     return data, bbox, img, is_object

    # def read_cam_display_out(self):
    #     #Call the standard method to get the qr data / bounding box
    #     data, bbox, img, _ = self.read_cam()
    #     # if there is a bounding box, draw one, along with the data
    #     if(bbox is not None):
    #         for i in range(len(bbox)):
    #             cv2.line(img, tuple(bbox[i][0]), tuple(bbox[(i+1) % len(bbox)][0]), color=(255,
    #                     0, 255), thickness=2)
    #         cv2.putText(img, data, (int(bbox[0][0][0]), int(bbox[0][0][1]) - 10), cv2.FONT_HERSHEY_SIMPLEX,
    #                     0.5, (0, 255, 0), 2)
    #         #if data:
    #             #print("data found: ", data)
    #     # display the image preview
    #     cv2.imshow("code detector", img)

    #     # save the image
    #     cv2.imwrite("frame1.jpg", img)     # save frame as JPEG file
    #     #self.count += 1

    def end_camera_session(self):
        #Stop the camera thread 
        self.killed_thread = True
        time.sleep(0.1)
        #Release the camera object
        self.vs.stop()
Ejemplo n.º 10
0
class DetectorTFLite:
    def __init__(self,
                 path_to_checkpoint,
                 path_to_labelmap,
                 filter_labels=None):
        self.filter_labels = filter_labels

        with open(path_to_labelmap, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del (self.labels[0])

        self.interpreter = Interpreter(model_path=path_to_checkpoint)
        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.tf_height = self.input_details[0]['shape'][1]
        self.tf_width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

    def ExtractBoxes(self, imH, imW, boxes, classes, scores):
        det_boxes = []
        for i in range(len(scores)):
            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            miny = int(max(1, (boxes[i][0] * imH)))
            minx = int(max(1, (boxes[i][1] * imW)))
            maxy = int(min(imH, (boxes[i][2] * imH)))
            maxx = int(min(imW, (boxes[i][3] * imW)))
            label = self.labels[int(classes[i])]
            det_boxes.append((minx, miny, maxx, maxy, label, float(scores[i])))
        return det_boxes

    def DetectFromImage(self, img):
        imH, imW, _ = img.shape
        # Acquire frame and resize to expected shape [1xHxWx3]
        frame_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.tf_width, self.tf_height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects

        return self.ExtractBoxes(imH, imW, boxes, classes, scores)

    def DisplayDetection(self, image, box, det_time=None):
        img = image.copy()

        x_min = box[0]
        y_min = box[1]
        x_max = box[2]
        y_max = box[3]
        cls = str(box[4])
        score = str(np.round(box[-1], 2))

        text = cls + ": " + score
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)
        cv2.rectangle(img, (x_min, y_min - 20), (x_min, y_min),
                      (255, 255, 255), -1)
        cv2.putText(img, text, (x_min + 5, y_min - 7),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        if det_time != None:
            fps = round(1000. / det_time, 1)
            fps_txt = str(fps) + " FPS"
            cv2.putText(img, fps_txt, (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                        (0, 0, 0), 2)

        return img
Ejemplo n.º 11
0
class YOLOV5:
    def __init__(self, wanted_labels=None, model_file=None, label_file=None, num_threads=None, edgetpu=False, libedgetpu=None, score_threshold=0.25):
        basedir = os.getenv('DEEPDISHHOME','.')
        if model_file is None:
          model_file = os.path.join(basedir, 'detectors/yolov5/yolov5s-int8.tflite')
        if label_file is None:
          label_file = os.path.join(basedir, 'detectors/yolov5/coco_classes.txt')
        self.cfg_file = os.path.join(basedir, 'detectors/yolov5/yolov5s.yaml')
        if wanted_labels is None:
          wanted_labels = ['person']
        self.wanted_labels = wanted_labels
        self.label_file = label_file
        self.score_threshold = score_threshold
        self.labels = self._get_labels()
        self.use_edgetpu = edgetpu
        self.int8 = False

        if 'saved_model' in model_file:
            self.mode = 'saved_model'
            if 'keras' not in sys.modules:
                print('yolov5: saved_model mode requires keras')
                sys.exit(1)
        elif '.tflite' in model_file:
            self.mode = 'tflite'
            if 'int8' in model_file: self.int8 = True
        else:
            print('unable to determine format of yolov5 model')
            sys.exit(1)

        if libedgetpu is None:
            libedgetpu = edgetpu_lib_name()

        if self.mode == 'tflite':
            # Load TFLite model and allocate tensors.
            self.interpreter = Interpreter(
                model_path=model_file,
                num_threads=num_threads,
                experimental_delegates=[load_delegate(libedgetpu)] if self.use_edgetpu else None)
            self.interpreter.allocate_tensors()
            self.num_threads = num_threads
            # Get input and output tensors.
            self.input_details = self.interpreter.get_input_details()
            self.output_details = self.interpreter.get_output_details()
            _, self.height, self.width, _ = self.input_details[0]['shape'].tolist()
        elif self.mode == 'saved_model':
            self.model = keras.models.load_model(model_file)
            self.num_threads = 1
            _, self.height, self.width, _ = self.model.inputs[0].shape.as_list()

        yaml_file = Path(self.cfg_file)
        with open(yaml_file) as f:
            cfg = yaml.load(f, Loader=yaml.FullLoader)
        self.anchors = cfg['anchors']

    def _get_labels(self):
        labels_path = os.path.expanduser(self.label_file)
        with open(labels_path) as f:
            labels = {i: line.strip() for i, line in enumerate(f.readlines())}
        return labels

    def detect_image(self, img):
        img_size = img.size
        img_resized = img.convert('RGB').resize((self.width, self.height), Image.ANTIALIAS)
        input_data = np.expand_dims(img_resized, 0).astype(np.float32)

        if self.int8:
            scale, zero_point = self.input_details[0]['quantization']
            input_data = (input_data / scale + zero_point).astype(np.uint8)

        if self.mode == 'tflite':
            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
            self.interpreter.invoke()
            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
            raw = np.copy(output_data)
        elif self.mode == 'saved_model':
            input_data /= 255.0
            output_data = self.model(input_data).numpy()

        if self.int8:
            scale, zero_point = self.output_details[0]['quantization']
            output_data = output_data.astype(np.float32)
            output_data = (output_data - zero_point) * scale

        x = np.copy(output_data)
        boxes = np.copy(x[..., :4])
        boxes[..., 0] = x[..., 0] - x[..., 2] / 2
        boxes[..., 1] = x[..., 1] - x[..., 3] / 2
        boxes[..., 2] = x[..., 0] + x[..., 2] / 2
        boxes[..., 3] = x[..., 1] + x[..., 3] / 2
        x[..., 5:] *= x[..., 4:5]
        best_classes = np.expand_dims(np.argmax(x[..., 5:], axis=-1), axis=-1)
        confidences = np.take_along_axis(x, best_classes + 5, axis=-1)
        y = np.concatenate((boxes, confidences, best_classes.astype(np.float32)), axis=-1)
        y = y[np.where(y[..., 4] >= self.score_threshold)]
        y[...,:4] *= np.array([img_size[0], img_size[1], img_size[0], img_size[1]])

        return_boxs = []
        return_lbls = []
        return_scrs = []

        for *xyxy, score, labelidx in y:
            label=self.labels[int(labelidx)]
            if label in self.wanted_labels and score >= self.score_threshold:
                tlwh = np.copy(xyxy)
                tlwh[2] = xyxy[2] - xyxy[0]
                tlwh[3] = xyxy[3] - xyxy[1]
                return_boxs.append(list(tlwh))
                return_lbls.append(label)
                return_scrs.append(score)
        return (return_boxs, return_lbls, return_scrs)
Ejemplo n.º 12
0
    t1 = cv2.getTickCount()
    # Grab frame from video stream
    frame1 = videostream.read()
    # Acquire frame and resize to expected shape [1xHxWx3]
    frame = frame1.copy()
    cut_img = np.zeros((640, 640, 3), np.uint8)
    cut_img[80:560] = frame
    frame_resized = cv2.resize(cut_img, (width, height), cv2.INTER_AREA)

    st1 = time.time()
    if EDGE_TPU:
        input_data = np.expand_dims(frame_resized, axis=0)
    else:
        input_data = np.expand_dims(frame_resized / 127.5 - 1, axis=0).astype(np.float32)

    face_interpreter.set_tensor(face_input_details['index'], input_data)
    face_interpreter.invoke()
    raw_box = face_interpreter.get_tensor(face_output_details[0]['index'])[0]
    raw_score = face_interpreter.get_tensor(face_output_details[1]['index'])[0]
    st2 = time.time()

    # 3. Postprocess the raw predictions:
    detections = _tensors_to_detections(raw_box, raw_score, anchors)

    # 4. Non-maximum suppression to remove overlapping detections:
    faces = _weighted_non_max_suppression(detections)
    print('Inference time: ', (st2 - st1) * 1000, ' Post-processing: ', (time.time() - st2) * 1000)

    for rc in faces:
        cv2.rectangle(frame, (int(rc[1]*imW), int(rc[0]*imW - 80)), (int(rc[3]*imW), int(rc[2]*imW - 80)), (0, 255, 0), 2)
        for i in range(6):
Ejemplo n.º 13
0
class Detection:
    def __init__(self):
        self.MODEL_NAME = "detect"
        self.GRAPH_NAME = "detect.tflite"
        self.LABELMAP_NAME = "label_map.txt"
        self.min_conf_threshold = 0.70
        self.resW, self.resH = (1280, 720)
        self.imW, self.imH = int(self.resW), int(self.resH)
        # self.use_TPU = (True if 'projects' in str(os.getcwd()) else False)
        self.use_TPU = False
        self.frame_rate_calc = None
        self.item_detected = False
        self.latest_item = None

        self.detection_counter = [
            {
                "name": "apple",
                "counter": 0
            },
            {
                "name": "aubergine",
                "counter": 0
            },
            {
                "name": "banana",
                "counter": 0
            },
            {
                "name": "broccoli",
                "counter": 0
            },
            {
                "name": "cucumber",
                "counter": 0
            },
            {
                "name": "orange",
                "counter": 0
            },
            {
                "name": "paprika",
                "counter": 0
            },
            {
                "name": "pear",
                "counter": 0
            }
        ]

        # Import TFLite requirements
        self.pkg = importlib.util.find_spec('tflite_runtime')
        if self.pkg:
            from tflite_runtime.interpreter import Interpreter
            if self.use_TPU:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if self.use_TPU:
                from tensorflow.lite.python.interpreter import load_delegate

        # If using Edge TPU, assign filename for Edge TPU model
        if self.use_TPU:
            # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
            if (self.GRAPH_NAME == 'detect.tflite'):
                self.GRAPH_NAME = 'edgetpu.tflite'

        # Get path to current working directory
        CWD_PATH = os.getcwd()

        PATH_TO_CKPT = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(self.GRAPH_NAME)

        PATH_TO_LABELS = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(
            self.LABELMAP_NAME)

        PATH_TO_OBJ_NAMES = "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names"
        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Fix for potential label map issue
        if self.labels[0] == '???':
            del (self.labels[0])

        if self.use_TPU:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                           experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)

        self.interpreter.allocate_tensors()
        
        print("Model loaded and tensors allocated")

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        #print("Input details: {}".format(self.input_details))
        self.output_details = self.interpreter.get_output_details()
        #print("Output detais: {}".format(self.output_details))
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

        # Initialize frame rate calculation
        self.frame_rate_calc = 1
        self.freq = cv2.getTickFrequency()

        # Initialize video stream
        self.videostream = VideoStream(resolution=(self.imW, self.imH))
        self.videostream = self.videostream.start()

    def filter_boxes(self, box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])):
        scores_max = tf.math.reduce_max(scores, axis=-1)

        mask = scores_max >= score_threshold
        class_boxes = tf.boolean_mask(box_xywh, mask)
        pred_conf = tf.boolean_mask(scores, mask)
        class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]])
        pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]])

        box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1)

        input_shape = tf.cast(input_shape, dtype=tf.float32)

        box_yx = box_xy[..., ::-1]
        box_hw = box_wh[..., ::-1]

        box_mins = (box_yx - (box_hw / 2.)) / input_shape
        box_maxes = (box_yx + (box_hw / 2.)) / input_shape
        boxes = tf.concat([
            box_mins[..., 0:1],  # y_min
            box_mins[..., 1:2],  # x_min
            box_maxes[..., 0:1],  # y_max
            box_maxes[..., 1:2]  # x_max
        ], axis=-1)
        # return tf.concat([boxes, pred_conf], axis=-1)
        return (boxes, pred_conf)

    def read_class_names(self, class_file_name):
        names = {}
        with open(class_file_name, 'r') as data:
            for ID, name in enumerate(data):
                names[ID] = name.strip('\n')
        return names

    # TODO: Definde cfg.YOLO.CLASSES
    def draw_bbox(self, image, bboxes, classes, show_label=True):
        num_classes = len(classes)
        image_h, image_w, _ = image.shape
        hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

        random.seed(0)
        random.shuffle(colors)
        random.seed(None)

        out_boxes, out_scores, out_classes, num_boxes = bboxes
        for i in range(num_boxes[0]):
            if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue
            coor = out_boxes[0][i]
            coor[0] = int(coor[0] * image_h)
            coor[2] = int(coor[2] * image_h)
            coor[1] = int(coor[1] * image_w)
            coor[3] = int(coor[3] * image_w)

            fontScale = 0.5
            score = out_scores[0][i]
            class_ind = int(out_classes[0][i])
            bbox_color = colors[class_ind]
            bbox_thick = int(0.6 * (image_h + image_w) / 600)
            c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
            cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

            if show_label:
                bbox_mess = '%s: %.2f' % (classes[class_ind], score)
                t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
                c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
                cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1)  # filled

                cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
        return image

    def perform(self):
        while True:
            t1 = cv2.getTickCount()

            frame1 = self.videostream.read()
            
            print("Frame read from stream")

            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
            # input_data = np.expand_dims(frame_resized, axis=0)

            image_data = cv2.resize(frame, (608, 608))
            image_data = image_data / 255.

            images_data = []
            for i in range(1):
                images_data.append(image_data)
            images_data = np.asarray(images_data).astype(np.float32)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            # if self.floating_model:
            # input_data = (np.float32(input_data) - self.input_mean) / self.input_std

            # Perform the actual detection by running the model with the image as input
            self.interpreter.set_tensor(self.input_details[0]['index'], images_data)
            print("Performing detection")
            self.interpreter.invoke()
            print("Detection performed")
            pred = [self.interpreter.get_tensor(self.output_details[i]['index']) for i in
                    range(len(self.output_details))]
            boxes, pred_conf = self.filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                 input_shape=tf.constant([608, 608]))

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=0.3,  # TODO: Make var
                score_threshold=0.3  # TODO: Make var
            )
            pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
            class_names = self.read_class_names(
                "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names")
            print("Drawing bounding boxes")
            frame = self.draw_bbox(frame, pred_bbox, class_names)
            #frame = Image.fromarray(frame.astype(np.uint8))

           # cv2.imshow('Object detector',frame.astype(np.uint8))
            time.sleep(5)
            image = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB)

            if cv2.waitKey(1) == ord('x'):
                break

            if self.item_detected:
                break

        return self.item_detected, self.latest_item

    def run(self, cloud=False):
        #while True:
            # for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):

            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = self.videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (self.width, self.height))

            if cloud:
                # TODO: Send image to cloud and get data back
                content_type = 'image/jpeg'
                headers = {'content-type': content_type}

                _, img_encoded = cv2.imencode('.jpg', frame_rgb)
                request_address = "http://a24dcb00998c.ngrok.io/api/detect"
                # send http request with image and receive response
                print("Sending image to cloud api and awaiting response")
                response = requests.post(request_address, data=img_encoded.tostring(), headers=headers)
                print("Response received:")
                print(json.loads(response.text))

            else:
                input_data = np.expand_dims(frame_resized, axis=0)

                # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
                if self.floating_model:
                    input_data = (np.float32(input_data) - self.input_mean) / self.input_std

                # Perform the actual detection by running the model with the image as input
                self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
                #print("Detection started")
                self.interpreter.invoke()
                #print("Detection complete")

                # Retrieve detection results
                #print(self.output_details)
                boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]  # Bounding coordinates of objects
                classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0]  # Class index of detected objects
                scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0]  # Confidence of detected objects
                num = self.interpreter.get_tensor(self.output_details[3]['index'])[0]
                # Total number of detected objects (inaccurate and not needed)

            max_score = 0

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                    # Specify that item has been detected
                    #self.item_detected = True
                    #if scores[i] > max_score:
                        #max_score = scores[i]
                        #self.latest_item = self.labels[int(classes[i])]


                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * self.imH)))
                    xmin = int(max(1, (boxes[i][1] * self.imW)))
                    ymax = int(min(self.imH, (boxes[i][2] * self.imH)))
                    xmax = int(min(self.imW, (boxes[i][3] * self.imW)))

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)

                    # Draw label
                    object_name = self.labels[int(classes[i])]  # Look up object name from "labels" array using class index
                    self.increase_detection_counter(object_name, scores[i])
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100))  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)  # Get font size
                    label_ymin = max(ymin, labelSize[1] + 10)  # Make sure not to draw label too close to top of window
                    cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10),
                                  (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255),
                                  cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(self.frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (255, 255, 0), 2,
                        cv2.LINE_AA)

            # All the results have been drawn on the frame, so it's time to display it.
            cv2.imshow('Object detector', frame)
        
            if cv2.waitKey(1) == ord('x'):
                    cv2.destroyAllWindows()
                    #break
        
            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2 - t1) / self.freq
            self.frame_rate_calc = 1 / time1

            self.item_detected, self.latest_item = self.get_object_with_score_five()

            if self.item_detected:
                self.reset_detection_counter()

            return self.item_detected, self.latest_item

    def increase_detection_counter(self, detected_item, score):
        for object in self.detection_counter:
            if object["name"] == detected_item:
                object["counter"]+=score


    def get_object_with_score_five(self):
        max_score = 0
        latest_object = "None"
        detected_object = False
        for object in self.detection_counter:
            if object["counter"] >= 5 and object["counter"] > max_score:
                latest_object = object["name"]
                detected_object = True
                max_score = object["counter"]
        return detected_object, latest_object

    def reset_detection_counter(self):
        self.detection_counter = [
            {
                "name": "apple",
                "counter": 0
            },
            {
                "name": "aubergine",
                "counter": 0
            },
            {
                "name": "banana",
                "counter": 0
            },
            {
                "name": "broccoli",
                "counter": 0
            },
            {
                "name": "cucumber",
                "counter": 0
            },
            {
                "name": "orange",
                "counter": 0
            },
            {
                "name": "paprika",
                "counter": 0
            },
            {
                "name": "pear",
                "counter": 0
            }
        ]

    def destroy(self):
        # Clean up
        cv2.destroyAllWindows()
        self.videostream.stop()
Ejemplo n.º 14
0
class ImageDetection:
    def __init__(self, modeldir):
        GRAPH_NAME = 'detect.tflite'
        LABELMAP_NAME = 'labelmap.txt'
        CWD_PATH = os.getcwd()
        PATH_TO_CKPT = os.path.join(CWD_PATH, modeldir, GRAPH_NAME)
        PATH_TO_LABELS = os.path.join(CWD_PATH, modeldir, LABELMAP_NAME)
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]
        if self.labels[0] == '???':
            del (self.labels[0])

        self.min_conf_threshold = 0.6
        self.input_mean = 127.5
        self.input_std = 127.5
        self.interpreter = Interpreter(model_path=PATH_TO_CKPT)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]
        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

    def detect(self, image_path):
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        im_h, im_w, _ = image.shape
        image_resized = cv2.resize(image_rgb, (self.width, self.height))
        input_data = np.expand_dims(image_resized, axis=0)
        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]
        classes = self.interpreter.get_tensor(
            self.output_details[1]['index'])[0]
        scores = self.interpreter.get_tensor(
            self.output_details[2]['index'])[0]
        detect_text = ""
        for i in range(len(scores)):
            if self.min_conf_threshold < scores[i] <= 1.0:
                ymin = int(max(1, (boxes[i][0] * im_h)))
                xmin = int(max(1, (boxes[i][1] * im_w)))
                ymax = int(min(im_h, (boxes[i][2] * im_h)))
                xmax = int(min(im_w, (boxes[i][3] * im_w)))
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)

                object_name = self.labels[int(classes[i])]
                label = '%s: %d%%' % (object_name, int(scores[i] * 100))
                label_size, base_line = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
                label_ymin = max(ymin, label_size[1] + 10)
                cv2.rectangle(
                    image, (xmin, label_ymin - label_size[1] - 10),
                    (xmin + label_size[0], label_ymin + base_line - 10),
                    (255, 255, 255), cv2.FILLED)
                cv2.putText(image, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
                detect_text = detect_text + " " + object_name
        cv2.imshow('Detector', image)
        os.system('echo %s | festival --tts & ' % detect_text)
        sleep(5)
        cv2.destroyAllWindows()
        return
Ejemplo n.º 15
0
    frame = cv2.imread(
        '/home/root/model/test_cat.jpg')  # reading frames from cam
    # print(frame)
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]
    frameo = cv2.resize(
        frame,
        (height, width))  #resizing frame according to model prescribed size

    if floating_model:
        input_data = (np.float32(input_data) / 218) - 1
    else:
        input_data = np.expand_dims(frameo,
                                    axis=0)  # Expand the shape of array

    interpreter.set_tensor(input_details[0]['index'],
                           input_data)  #passing data to model
    interpreter.invoke()  #making predictions

    boundbox = interpreter.get_tensor(
        output_details[0]['index'])  #getting output
    obj_class = interpreter.get_tensor(
        output_details[1]['index'])  #getting outout
    score = interpreter.get_tensor(output_details[2]['index'])  #getting output
    num = interpreter.get_tensor(
        output_details[3]['index'])  #Always equals to 10

    for i in range(int(num)):

        top, left, bottom, right = boundbox[0][
            i]  #getting the postion of detected object
        classId = int(obj_class[0][i])  #getting class of object
Ejemplo n.º 16
0
w = img.shape[1]

img = cv2.resize(img, (256, 144))
img = np.asarray(img)
img = img / 255.
img = img.astype(np.float32)
img = img[np.newaxis, :, :, :]

# Tensorflow Lite
interpreter = Interpreter(model_path='model_float16_quant.tflite',
                          num_threads=4)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]['index']
output_details = interpreter.get_output_details()[0]['index']

interpreter.set_tensor(input_details, img)
interpreter.invoke()
output = interpreter.get_tensor(output_details)

print(output.shape)
out1 = output[0][:, :, 0]
out2 = output[0][:, :, 1]

out1 = np.invert((out1 > 0.5) * 255)
out2 = np.invert((out2 > 0.5) * 255)

print('out1:', out1.shape)
print('out2:', out2.shape)

out1 = cv2.resize(np.uint8(out1), (w, h))
out2 = cv2.resize(np.uint8(out2), (w, h))
Ejemplo n.º 17
0
class ButtDetector:
    def __init__(self):
        if TPU:
            self.interpreter = Interpreter(model_path=os.path.join(MODEL_DIR,
                                                                   'butt_detecter_quantized_edgetpu.tflite'),
                                           experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        else:
            self.interpreter = Interpreter(model_path=os.path.join(MODEL_DIR, 'butt_detecter_quantized.tflite'))
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]
        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        with open(os.path.join(MODEL_DIR, "labelmap.txt"), 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        self.butt_nums = 0
        self.detected_status = False

    def detect_butts(self, frame, count_ret=False):
        if count_ret:
            # st_time = time.time()
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            im_h, im_w, _ = frame.shape
            image_resized = cv2.resize(image_rgb, (self.width, self.height))
            input_data = np.expand_dims(image_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if self.floating_model:
                input_data = (np.float32(input_data) - INPUT_MEAN) / INPUT_STD

            # Perform the actual detection by running the model with the image as input
            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
            self.interpreter.invoke()
            scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0]

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            detected_butts = 0
            for i in range(len(scores)):
                if (scores[i] > THRESHOLD) and (scores[i] <= 1.0):
                    detected_butts += 1

            if self.butt_nums == 0 and detected_butts > 0:
                self.butt_nums = detected_butts
                self.detected_status = True
            elif self.butt_nums != 0 and detected_butts != 0 and self.butt_nums != detected_butts:
                self.butt_nums = detected_butts
                self.detected_status = True
            elif self.butt_nums != 0 and detected_butts == 0:
                self.butt_nums = detected_butts
                self.detected_status = False
            elif self.butt_nums != 0 and self.butt_nums == detected_butts:
                self.detected_status = False
            # print(f"[INFO] Processing Time: {time.time() - st_time}")

        cv2.putText(frame, f"The number of Butts: {self.butt_nums}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 255, 0), 2)

        return frame
Ejemplo n.º 18
0
def process_frame(frame):

    global entry, lime_count, marker_count, lime_sizes, found_list, total_marker_width, pixel_per_metric
    interpreter = Interpreter(model_path=PATH_TO_CKPT, num_threads=4)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #frame_rgb = frame
    frame_resized = cv2.resize(frame_rgb, (width, height))
    #frame_resized = cv2.resize(frame_rgb, (480, 320))
    input_data = np.expand_dims(frame_resized, axis=0)
    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    try:
        start_time = time.time()
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        elapsed_time.append(time.time() - start_time)
    except:
        print('Thread Error: interpreter not reference')
    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[
        0]  # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(
        output_details[1]['index'])[0]  # Class index of detected objects
    scores = interpreter.get_tensor(
        output_details[2]['index'])[0]  # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1, (boxes[i][0] * imH)))
            xmin = int(max(1, (boxes[i][1] * imW)))
            ymax = int(min(imH, (boxes[i][2] * imH)))
            xmax = int(min(imW, (boxes[i][3] * imW)))

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4)

            # Draw label
            object_name = labels[int(
                classes[i]
            )]  # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                  )  # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.7, 2)  # Get font size
            label_ymin = max(
                ymin, labelSize[1] +
                10)  # Make sure not to draw label too close to top of window
            cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10),
                          (xmin + labelSize[0], label_ymin + baseLine - 10),
                          (255, 255, 255),
                          cv2.FILLED)  # Draw white box to put label text in
            cv2.putText(frame, label, (xmin, label_ymin - 7),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                        2)  # Draw label text

            # counting objects and measure diameter of lime
            if xmin < LINE2 and xmax > LINE1 and not entry:
                entry = True

            if entry and xmax <= LINE1:
                entry = False

                if (int(classes[i]) + 1 == 1):
                    lime_found = time.time() - start_total_time

                    try:
                        lime_count += 1
                        lime_diameter = (
                            (xmax - xmin) +
                            (ymax - ymin)) / (2 * pixel_per_metric)
                        lime_sizes.append(lime_diameter)
                        found_list.append(lime_found)
                        print(
                            f'lime {lime_count} is found at {lime_found}, Diameter(size): {lime_diameter * 1000:.3f} mm'
                        )
                    except:
                        # marker must came first for calculating pixel/metric
                        lime_count -= 1
                        marker_count += 1
                        total_marker_width += ((xmax - xmin) +
                                               (ymax - ymin)) / 2
                        pixel_per_metric = (total_marker_width /
                                            marker_count) / MARKER_DIAMETER

                elif (int(classes[i]) + 1 == 2):
                    marker_count += 1
                    total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2

                    pixel_per_metric = (total_marker_width /
                                        marker_count) / MARKER_DIAMETER
    # insert Lime Count information text
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(
        frame,
        'Lime Count: ' + str(lime_count),
        (10, 35),
        font,
        0.8,
        (0, 0xFF, 0xFF),
        2,
        cv2.FONT_HERSHEY_SIMPLEX,
    )

    # insert Marker Count information text
    cv2.putText(
        frame,
        'Marker Count: ' + str(marker_count),
        (10, 55),
        font,
        0.8,
        (0, 0xFF, 0xFF),
        2,
        cv2.FONT_HERSHEY_SIMPLEX,
    )

    # overlay with line
    pt1 = (LINE1, 0)
    pt2 = (LINE1, int(sqsize))
    cv2.line(frame, pt1, pt2, (0, 0, 255), 2)

    pt1 = (LINE2, 0)
    pt2 = (LINE2, int(sqsize))
    cv2.line(frame, pt1, pt2, (0, 0, 255), 2)

    frame = cv2.resize(frame, (480, 320))

    return frame
Ejemplo n.º 19
0
def detection():

    # Define and parse input arguments
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
    #	            required=True)
    #parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
    #	            default='detect.tflite')
    #parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
    #	            default='labelmap.txt')
    #parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
    #	            default=0.5)
    #parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir',
    #	            default=None)
    #parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.',
    #	            default=None)
    #parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
    #	            action='store_true')

    #args = parser.parse_args()

    MODEL_NAME = 'Sample_TFLite_model'
    GRAPH_NAME = 'detect.tflite'
    LABELMAP_NAME = 'labelmap.txt'
    min_conf_threshold = float(0.5)
    use_TPU = None

    # Parse input image name and directory.
    IM_NAME = None
    IM_DIR = None

    # If both an image AND a folder are specified, throw an error
    if (IM_NAME and IM_DIR):
        print(
            'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.'
        )
        sys.exit()

# If neither an image or a folder are specified, default to using 'test1.jpg' for image name
    if (not IM_NAME and not IM_DIR):
        IM_NAME = 'test1.jpg'

# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

# If using Edge TPU, assign filename for Edge TPU model
    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

# Get path to current working directory
    CWD_PATH = os.getcwd()

    # Define path to images and grab all image filenames
    if IM_DIR:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR)
        images = glob.glob(PATH_TO_IMAGES + '/*')

    elif IM_NAME:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME)
        images = glob.glob(PATH_TO_IMAGES)

# Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Loop over every image and perform detection
    for image_path in images:

        # Load image and resize to expected shape [1xHxWx3]
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imH, imW, _ = image.shape
        image_resized = cv2.resize(image_rgb, (width, height))
        input_data = np.expand_dims(image_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

# Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * imH)))
                xmin = int(max(1, (boxes[i][1] * imW)))
                ymax = int(min(imH, (boxes[i][2] * imH)))
                xmax = int(min(imW, (boxes[i][3] * imW)))

                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                score_detection = int(scores[i] * 100)
                #print("Your object is a ",object_name, " by : ",test_reCup, "%")
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    image, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(image, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
                return (object_name == 'cup' and score_detection >= 55)

# All the results have been drawn on the image, now display the image
        cv2.imshow('Object detector', image)

        # Press any key to continue to next image, or press 'q' to quit
        if cv2.waitKey(0) == ord('q'):
            break

# Clean up
    cv2.destroyAllWindows()
def object_detection():
    label_out = []
    mid_x_out = []
    mid_y_out = []

    class VideoStream:
        """Camera object that controls video streaming from the Picamera"""
        def __init__(self, resolution=(640, 480), framerate=30):
            # Initialize the PiCamera and the camera image stream
            self.stream = cv2.VideoCapture(0)
            ret = self.stream.set(cv2.CAP_PROP_FOURCC,
                                  cv2.VideoWriter_fourcc(*'MJPG'))
            ret = self.stream.set(3, resolution[0])
            ret = self.stream.set(4, resolution[1])

            # Read first frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

            # Variable to control when the camera is stopped
            self.stopped = False

        def start(self):
            # Start the thread that reads frames from the video stream
            Thread(target=self.update, args=()).start()
            return self

        def update(self):
            # Keep looping indefinitely until the thread is stopped
            while True:
                # If the camera is stopped, stop the thread
                if self.stopped:
                    # Close camera resources
                    self.stream.release()
                    return

                # Otherwise, grab the next frame from the stream
                (self.grabbed, self.frame) = self.stream.read()

        def read(self):
            # Return the most recent frame
            return self.frame

        def stop(self):
            # Indicate that the camera and thread should be stopped
            self.stopped = True

    # Define and parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--modeldir',
                        help='Folder the .tflite file is located in',
                        required=True)
    parser.add_argument(
        '--graph',
        help='Name of the .tflite file, if different than detect.tflite',
        default='detect.tflite')
    parser.add_argument(
        '--labels',
        help='Name of the labelmap file, if different than labelmap.txt',
        default='labelmap.txt')
    parser.add_argument(
        '--threshold',
        help='Minimum confidence threshold for displaying detected objects',
        default=0.5)
    parser.add_argument(
        '--resolution',
        help=
        'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
        default='1280x720')
    parser.add_argument(
        '--edgetpu',
        help='Use Coral Edge TPU Accelerator to speed up detection',
        action='store_true')

    args = parser.parse_args()

    MODEL_NAME = args.modeldir
    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    min_conf_threshold = float(args.threshold)
    resW, resH = args.resolution.split('x')
    imW, imH = int(resW), int(resH)
    use_TPU = args.edgetpu

    # Import TensorFlow libraries
    # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tensorflow')
    if pkg is None:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Initialize frame rate calculation
    frame_rate_calc = 1
    freq = cv2.getTickFrequency()

    # Initialize video stream
    videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
    time.sleep(1)

    #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
    while True:
        flag = 0
        # Start timer (for calculating frame rate)
        t1 = cv2.getTickCount()

        # Grab frame from video stream
        frame1 = videostream.read()

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * imH)))
                xmin = int(max(1, (boxes[i][1] * imW)))
                ymax = int(min(imH, (boxes[i][2] * imH)))
                xmax = int(min(imW, (boxes[i][3] * imW)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)
                cv2.circle(frame, (xmin, ymin), 5, (255, 255, 0), cv2.FILLED)
                cv2.circle(frame, (xmax, ymax), 5, (0, 255, 255), cv2.FILLED)
                x_diff = xmax - xmin
                y_diff = ymax - ymin
                mid_x = x_diff / 2 + xmin
                mid_x = math.ceil(mid_x)
                mid_y = ymin + y_diff / 2
                mid_y = math.ceil(mid_y)
                cv2.circle(frame, (0, 0), 5, (0, 0, 255), cv2.FILLED)
                cv2.circle(frame, (mid_x, mid_y), 5, (255, 255, 255),
                           cv2.FILLED)

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
                label_out.append(label)
                mid_x_out.append(mid_x)
                mid_y_out.append(mid_y)
        # Draw framerate in corner of frame
        cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        #cv2.imshow('Object detector', frame)

        # Calculate framerate
        t2 = cv2.getTickCount()
        time1 = (t2 - t1) / freq
        frame_rate_calc = 1 / time1
        (h, w) = frame.shape[:2]
        cv2.waitKey(100)
        break
    # Clean up
    cv2.destroyAllWindows()
    videostream.stop()
    return (label_out, mid_x_out, mid_y_out, h / 2, w / 2)
Ejemplo n.º 21
0
    def frames():

        # configure arguments -- begin
        MODEL_NAME = 'Sample_TFlite_model'
        GRAPH_NAME = 'objectdetect.tflite'
        LABELMAP_NAME = 'labelmapobjectdetect.txt'
        min_conf_threshold = float(0.7)
        imW, imH = 640, 320
        # configure arguments --end

        # Import TensorFlow libraries
        # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
        pkg = importlib.util.find_spec('tflite_runtime')
        if pkg:
            from tflite_runtime.interpreter import Interpreter
        else:
            from tensorflow.lite.python.interpreter import Interpreter

# Get path to current working directory
        CWD_PATH = os.getcwd()

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            labels = [line.strip() for line in f.readlines()]

# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
        if labels[0] == '???':
            del (labels[0])


# Load the Tensorflow Lite model.
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

        interpreter.allocate_tensors()

        # Get model details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        height = input_details[0]['shape'][1]
        width = input_details[0]['shape'][2]

        floating_model = (input_details[0]['dtype'] == np.float32)

        input_mean = 127.5
        input_std = 127.5

        frame_rate_calc = 1
        freq = cv2.getTickFrequency()

        # Initialize video stream
        videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
        time.sleep(1)

        #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
        while True:
            t1 = cv2.getTickCount()
            # Grab frame from video stream
            frame1 = videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (width, height))
            input_data = np.expand_dims(frame_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()

            # Retrieve detection results
            boxes = interpreter.get_tensor(output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[
                0]  # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[
                0]  # Confidence of detected objects

            # boxes: [ymin, xmin, ymax, xmax]

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if (scores[i] > min_conf_threshold) and (scores[i] < 1):
                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * imH)))
                    xmin = int(max(1, (boxes[i][1] * imW)))
                    ymax = int(min(imH, (boxes[i][2] * imH)))
                    xmax = int(min(imW, (boxes[i][3] * imW)))

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (10, 255, 0), 2)

                    # Draw label
                    object_name = labels[int(
                        classes[i]
                    )]  # Look up object name from "labels" array using class index
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                          )  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        2)  # Get font size
                    label_ymin = max(
                        ymin, labelSize[1] + 10
                    )  # Make sure not to draw label too close to top of window
                    cv2.rectangle(
                        frame, (xmin, label_ymin - labelSize[1] - 10),
                        (xmin + labelSize[0], label_ymin + baseLine - 10),
                        (255, 255, 255),
                        cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc),
                        (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0),
                        2, cv2.LINE_AA)

            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2 - t1) / freq
            frame_rate_calc = 1 / time1

            # encode as a jpeg image and return it
            yield cv2.imencode('.jpg', frame)[1].tobytes()
def objectsCount(MODEL_NAME, GRAPH_NAME, LABELMAP_NAME, min_conf_threshold,
                 use_TPU, IM_NAME, IM_DIR):
    import os
    import cv2
    import numpy as np
    import sys
    import glob
    import importlib.util

    # If both an image AND a folder are specified, throw an error
    if (IM_NAME and IM_DIR):
        print(
            'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.'
        )
        sys.exit()

    # If neither an image or a folder are specified, default to using 'test1.jpg' for image name
    if (not IM_NAME and not IM_DIR):
        IM_NAME = 'test1.jpg'

    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Define path to images and grab all image filenames
    if IM_DIR:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR)
        images = glob.glob(PATH_TO_IMAGES + '/*')

    elif IM_NAME:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME)
        images = glob.glob(PATH_TO_IMAGES)

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])

    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    objects_list = {
    }  #create the dictionary where the traffic names and number of cars detected will be saved

    # Loop over every image and perform detection
    for image_path in images:

        # Load image and resize to expected shape [1xHxWx3]
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imH, imW, _ = image.shape
        image_resized = cv2.resize(image_rgb, (width, height))
        input_data = np.expand_dims(image_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        objects_count = 0  #instantiate detected object counts

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                if (object_name == 'car'):
                    objects_count = objects_count + 1  #get the count of cars detected in the image

        objects_list[image_path] = objects_count
    return (objects_list)
Ejemplo n.º 23
0
class Detector:
    """
    Perform object detection with the given model. The model is a quantized tflite
    file which if the detector can not find it at the path it will download it
    from neuralet repository automatically.

    :param config: Is a ConfigEngine instance which provides necessary parameters.
    """
    def __init__(self, config):
        self.config = config
        # Get the model name from the config
        self.model_name = self.config.get_section_dict('Detector')['Name']
        # Frames Per Second
        self.fps = None
        self.model_file = 'ped_ssd_mobilenet_v2_quantized_edgetpu.tflite'
        self.model_path = '/repo/data/edgetpu/' + self.model_file

        # Get the model .tflite file path from the config.
        # If there is no .tflite file in the path it will be downloaded automatically from base_url
        user_model_path = self.config.get_section_dict('Detector')['ModelPath']
        if len(user_model_path) > 0:
            print('using %s as model' % user_model_path)
            self.model_path = user_model_path
        else:
            base_url = 'https://media.githubusercontent.com/media/neuralet/neuralet-models/master/edge-tpu/'
            url = base_url + self.model_name + '/' + self.model_file

            if not os.path.isfile(self.model_path):
                print('model does not exist under: ', self.model_path,
                      'downloading from ', url)
                wget.download(url, self.model_path)

        # Load TFLite model and allocate tensors
        self.interpreter = Interpreter(
            self.model_path,
            experimental_delegates=[load_delegate("libedgetpu.so.1")])
        self.interpreter.allocate_tensors()
        # Get the model input and output tensor details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Get class id from config
        self.class_id = int(
            self.config.get_section_dict('Detector')['ClassID'])
        self.score_threshold = float(
            self.config.get_section_dict('Detector')['MinScore'])

    def inference(self, resized_rgb_image):
        """
        inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding detection output which is used for creating result
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]
        """
        input_image = np.expand_dims(resized_rgb_image, axis=0)
        # Fill input tensor with input_image
        self.interpreter.set_tensor(self.input_details[0]["index"],
                                    input_image)
        t_begin = time.perf_counter()
        self.interpreter.invoke()
        inference_time = time.perf_counter() - t_begin  # Second
        self.fps = convert_infr_time_to_fps(inference_time)
        # The function `get_tensor()` returns a copy of the tensor data.
        # Use `tensor()` in order to get a pointer to the tensor.
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        labels = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        # TODO: will be used for getting number of objects
        # num = self.interpreter.get_tensor(self.output_details[3]['index'])

        result = []
        for i in range(boxes.shape[1]):  # number of boxes
            if labels[0,
                      i] == self.class_id and scores[0,
                                                     i] > self.score_threshold:
                result.append({
                    "id": str(self.class_id) + '-' + str(i),
                    "bbox": boxes[0, i, :],
                    "score": scores[0, i]
                })

        return result
def gen_frames():
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
# Source - Adrian Rosebrock, PyImageSearch: https://www.pyimagesearch.com/2015/12/28/increasing-raspberry-pi-fps-with-python-and-opencv/
    class VideoStream(object):
        """Camera object that controls video streaming from the Picamera"""
        def __init__(self,resolution=(640,480),framerate=30,target=None,args=()):
            global capture_image_limit
            capture_image_limit = 2000
            
            global file_save_id
            file_save_id =0
            
            # Initialize the PiCamera and the camera image stream
            self.stream = cv2.VideoCapture(0)
 
            #VideoStream Instance
            instance = VideoStream.__qualname__
            print('The class instance is: ',instance)
            #print('\nVIDEOSTREAM: locals() value inside class\n', locals())
            #print(dir(VideoStream))
 
            #Reload
            reloadClass = os.environ.get('reload')
            if reloadClass == 'True':
                print('Delete Self:')
                del self
                os.environ['reload'] = 'False'          
                        
            ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
            ret = self.stream.set(3,resolution[0])
            ret = self.stream.set(4,resolution[1])
                
            # Read first frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

            # Variable to control when the camera is stopped
            self.stopped = False            
            
        def __del__(self):
            print ("Object destroyed");   

        def start(self):
        # Start the thread that reads frames from the video stream
            Thread(target=self.update,args=()).start()
            
            return self

        def update(self):
            # Keep looping indefinitely until the thread is stopped
            while True:
                # If the camera is stopped, stop the thread
                if self.stopped:
                    # Close camera resources
                    self.stream.release()
                    return
                    
                # Otherwise, grab the next frame from the stream
                (self.grabbed, self.frame) = self.stream.read()

        def read(self):
        # Return the most recent frame
            this_instance = self
            return self.frame

        def stop(self):
        # Indicate that the camera and thread should be stopped
            self.stopped = True


    # Define and parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
                        required=True)
    parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
                        default='detect.tflite')
    parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
                        default='labelmap.txt')
    parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
                        default=0.5)
    parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
                        default='1280x720')
    parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
                        action='store_true')

    args = parser.parse_args()
    

    MODEL_NAME = args.modeldir
    print('~~~~ Param Default Model Name: ' + str(MODEL_NAME))
    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    min_conf_threshold = float(args.threshold)
    resW, resH = args.resolution.split('x')
    imW, imH = int(resW), int(resH)
    use_TPU = args.edgetpu


    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    print('TPU Runtime' + str(pkg))

    if pkg:
        from tflite_runtime.interpreter import Interpreter 
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate 
    else:
        from tensorflow.lite.python.interpreter import Interpreter 
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate 

    # If using Edge TPU, assign filename for Edge TPU model
    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'       

    # Get path to current working directory
    # Multi-Model
    # Demo90 /home/pi/SensorFusion/Demo90
    # Deer: /home/pi/SensorFusion/PreLoadedModels/Model01.Deer
    # Head: /home/pi/SensorFusion/PreLoadedModels/Model02.Head
    # Eyes: /home/pi/SensorFusion/PreLoadedModels/Model03.Eyes
    # Tree: /home/pi/SensorFusion/PreLoadedModels/Model04.Tree
    # check.id - cd /home/pi/SensorFusion/checkid

    CWD_PATH = os.getcwd()
    print("Default Path: "+ CWD_PATH)
     
    newModel = str(os.environ.get('run_model'))
    
    print("New Model Name: "+ newModel)
    
    if newModel == "Demo90":
        CWD_PATH = "/home/pi/SensorFusion/"+ newModel 
    elif newModel == 'Check.ID':
        CWD_PATH = "/home/pi/SensorFusion/checkid" 
    else:
        CWD_PATH = "/home/pi/SensorFusion/PreLoadedModels/"+ newModel 
            
        print("Current Model Path: "+ CWD_PATH)

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
    print("Current Path to Label Map: "+ PATH_TO_LABELS)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del(labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    
    #if video_camera_flag:#Using a Flag here - for future use
    if use_TPU:
        interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                  experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print('TPU Detected' + PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)
        print('No TPU detected!'+ PATH_TO_CKPT)            

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Initialize frame rate calculation
    frame_rate_calc = 1
    freq = cv2.getTickFrequency()

    # Initialize video stream
    videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
    time.sleep(1)
    
    global img_counter
    img_counter = 0
  
    #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
    try:
        while True:
        #while video_camera_flag:
            
            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            global frame
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (width, height))
            input_data = np.expand_dims(frame_resized, axis=0)
            
            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'],input_data)
            interpreter.invoke()

            # Retrieve detection results           
            person_found = False
            
            boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
            #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)
            
            #Kill TensofFlow while Annotating
            
            kill_tensorFlow = os.environ.get('kill_tensorFlow')
            #print("TensofFlow Status: " + str(kill_tensorFlow))

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
                    
                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1,(boxes[i][0] * imH)))
                    xmin = int(max(1,(boxes[i][1] * imW)))
                    ymax = int(min(imH,(boxes[i][2] * imH)))
                    xmax = int(min(imW,(boxes[i][3] * imW)))
                    
                    #print("Kill TF Flag: "+ str(kill_tensorFlow))
                    if kill_tensorFlow != 'True':
                        try:
                            cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 3)
                        except:
                            pass

                    # Draw label (object_name) and score (%)    
                    
                    object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index 
                    #print(labels[int(classes[i])]+": "+str(i))  
                    
                    
                    if labels[int(classes[0])]== 'person':#NOTE - The bar is for one person only
                        #print('Person Found!')
                        person_found = True# used for bar below
                    
                    scores_flag = os.environ.get('scores_flag')
                    labels_flag = os.environ.get('labels_flag')
                    
                    #states
                    state_ = 11 #both on by default
                    if labels_flag == 'labels_off' and scores_flag == 'scores_off':
                        state_ = 0#00
                        label = object()
                    
                    if labels_flag == 'labels_on' and scores_flag == 'scores_on':                    
                        state_ = 11#11
                        label = '%s: %d%%' % (object_name.capitalize(), int(scores[i]*100)) # Example: 'person: 72%'
                    
                    if labels_flag == 'labels_off' and scores_flag == 'scores_on':
                        label = '%d%%' % (int(scores[i]*100)) # Example: '72%'
                        state_ = 1#01    
                        
                    if labels_flag == 'labels_on' and scores_flag == 'scores_off':
                        state_= 10 #10   
                        label = '%s: ' % (object_name.capitalize()) # Example: 'person: '
                    
                    #draw the labels, background score and box
                    if state_ != 0:
                        labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size                        
                        label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window                    
                        #cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (237,237,237), cv2.FILLED) # Draw white box to put label text in                   
                        if kill_tensorFlow != 'True':
                            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (128,128,128), cv2.FILLED) # Draw gray box to put label text in                   
                            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) # Draw label text
                    else:
                        if kill_tensorFlow != 'True':
                            cv2.rectangle(frame, (xmin,ymin), (xmin,ymin), (237,237,237), cv2.FILLED) # Draw frame with no label OR score text !
                        
            # Draw framerate in corner of frame - use 'F' key to toggle on/off
            try:
                if fps_flag:
                    cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
                else:
                    pass
            except:
                pass
            
            #If Capture Image Draw status text
            capture_flag = os.environ.get('cap_flag')    
            try:
                if capture_flag == "True":    
                    cv2.putText(frame,'Saving File: '+str(img_counter),(520,50),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,255),2)
                else:
                    pass
                
            except:
                pass
                
            # All the results have been drawn on the frame, so it's time to display it.
            #cv2.imshow('Object detector', frame) ## Commented for the FLASK API 
 
            #Module widgets.meter()
            if kill_tensorFlow != 'True':
                #window_name ='Object detector'                     
                top = int(scores[0]*100)
                color = (0,0,255)
                if person_found == True:
                     widgets.meter(frame,top)#module
            #End Module

            # Displaying the image - DO NOT USE!
            #cv2.imshow(window_name, image)  
          
            #SENSOR FUSION Flask VIDEO API 
            #Brute Force Motion JPEG, OpenCV defaults to capture raw images,
            #so we must encode it into JPEG in order to correctly display the
            #video stream - NOTE need to work on this cv2.imencode tobytes slows the apparent frame rate by about 50%, plus the UI takes some
            #See: https://www.pyimagesearch.com/2017/02/06/faster-video-file-fps-with-cv2-videocapture-and-opencv/
            
            ret, buffer = cv2.imencode('.jpg', frame)
            frame2 = buffer.tobytes() #the image that is saved           
           
            #Capture Images and save to Annotate Named subdirectory under ~/Pictures
            #capture_flag = os.environ.get('cap_flag')
            annotate_name = os.environ.get('annotate_name')           
                        
            
            if capture_flag == 'True':
                #Check limit
                try:
                    print("image limit: " + anno_images)                    
                    capture_image_limit = int(anno_images)
                except:
                    pass
      
            if capture_flag == 'True' and img_counter < capture_image_limit:
                #Create new or use existing directory
                path_to_directory = '../Pictures/' + annotate_name
                print("Saving to ", path_to_directory)
                try:
                    os.makedirs(path_to_directory)
                except FileExistsError:
                    #dir already exists, so overwrite existing (unless we datestamp)!
                    pass
                img_name="../Pictures/"+annotate_name+"/"+annotate_name+"sf-frame_{}.jpg".format(img_counter)
            
                cv2.namedWindow("Capture Window")
                cv2.moveWindow("Capture Window", -500, -500)# push it off screen :)
                
                cv2.imwrite(img_name, frame1)
                print('Wrote Image-'+ img_name)
                
                
                img_counter +=1
                
            #Clear Capture Flag when done grabbing images
            if capture_flag == 'True' and img_counter >= capture_image_limit: 
                   os.environ['cap_flag'] = 'False'
                   img_counter = 0
                   
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame2 + b'\r\n')  # concat frame one by one and show result
            ## End Video Stream API ###

            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2-t1)/freq
            frame_rate_calc= 1/time1
                       
            # Press 'q' to quit
            if cv2.waitKey(1) == ord('q'):
                print("CV2 Break")
                break
        
            
            # Press 'q' to quit
            quit_flag = os.environ.get('quit_flag')
            if quit_flag == 'quit':#
                os.environ['quit_flag'] = ''
                print("CV2 Quit " + quit_flag)
                cv2.destroyAllWindows()
                if videostream:
                    #videostream.release()
                    videostream.stop()
                    print('Videostream stopped')
                break
            
                
            #print("quit_flag " + str(quit_flag))    
        # Clean up
        cv2.destroyAllWindows()
        if videostream:
            #videostream.release()
            videostream.stop()
            #os.system("pkill chromium")
        #webbrowser.open('http://localhost:5000', new=0)
            
    except KeyboardInterrupt:
        pass
Ejemplo n.º 25
0
    image_height = image.shape[0]
    image_width = image.shape[1]

    # Resize and normalize image for network input
    t3 = time.perf_counter()
    frame = cv2.resize(image, (300, 300))
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = np.expand_dims(frame, axis=0)
    frame = frame.astype(np.float32)
    cv2.normalize(frame, frame, -1, 1, cv2.NORM_MINMAX)
    t4 = time.perf_counter()
    print("resize and normalize time: ", t4 - t3)

    # run model
    t5 = time.perf_counter()
    interpreter.set_tensor(input_details[0]['index'], frame)
    interpreter.invoke()
    t6 = time.perf_counter()
    print("inference + postprocess time: ", t6 - t5)

    # get results
    boxes = interpreter.get_tensor(output_details[0]['index'])[0]
    classes = interpreter.get_tensor(output_details[1]['index'])[0]
    scores = interpreter.get_tensor(output_details[2]['index'])[0]
    count = interpreter.get_tensor(output_details[3]['index'])[0]

    for i, (box, classidx, score) in enumerate(zip(boxes, classes, scores)):
        probability = score
        if probability >= 0.6:
            if (not math.isnan(box[0]) and not math.isnan(box[1])
                    and not math.isnan(box[2]) and not math.isnan(box[3])):
Ejemplo n.º 26
0
class ImageProcessing:

    
    import numpy as np
    import cv2
    import math
    from threading import Thread
    from multiprocessing import Process
    from threading import Timer
    import os

    def __init__(self, resolution, flag):
        
        self.height, self.width = resolution
        self.resolution = self.height, self.width
        
        self.index=0
        self.mode = 0
        self.flag = flag
        
        self.camera = self.cv2.VideoCapture(-1)
        self.camera.set(self.cv2.CAP_PROP_FRAME_HEIGHT, self.height)
        self.camera.set(self.cv2.CAP_PROP_FRAME_WIDTH, self.width)
        
        self._setROI()
        
        self._setCoral()
        
        self._getDashboard()
        
        self._getIpmMat()
        
        self._getMaskBG()
        
        
                
    def _setROI(self):
        self.ROI_W = int(self.width*0.2)
        self.ROI_H = int(self.height*0.35)
        self.ROI_far_pos = 0.65 # Set along your vehicle velocity and frane rate | Late: far, Fast: near
        self.ROI_far_rngH = slice(int(self.height*self.ROI_far_pos - self.ROI_H), int(self.height*self.ROI_far_pos))
        self.ROI_near_rngH = slice(int(self.height - self.ROI_H), int(self.height))
        self.ROI_rngW = slice(int(self.width/2 - self.ROI_W/2),int(self.width/2 + self.ROI_W/2))
        self.ROI_lane_rngH = slice(int(self.height*0.7 - self.ROI_H*0.7), int(self.height*0.7))
        self.ROI_lane_rngW = slice(int(self.width/2 - self.ROI_W*0.5),int(self.width/2 + self.ROI_W*0.5))
        
    def _getIpmMat(self):
        
        # Your camera inner & external parameters
        alpha = (7-90)*self.np.pi/180
        beta = 0
        gamma = 0
        dist = 300
        focal = 500
        
        # Calculating rotational transformation matrix
        A1 = self.np.array([[1,0,-self.width/2],[0,1,-self.height/2],[0,0,0],[0,0,1]],dtype='f')
        RX = self.np.array([[1,0,0,0],[0,self.math.cos(alpha), -self.math.sin(alpha),0],[0,self.math.sin(alpha),self.math.cos(alpha),0],[0,0,0,1]],dtype='f')
        RY = self.np.array([[self.math.cos(beta),0,-self.math.sin(beta),0],[0,1,0,0],[self.math.sin(beta),0,self.math.cos(beta),0],[0,0,0,1]],dtype='f')
        RZ = self.np.array([[self.math.cos(gamma),-self.math.sin(gamma),0,0],[self.math.sin(gamma),self.math.cos(gamma),0,0],[0,0,1,0],[0,0,0,1]],dtype='f')
        R = self.np.dot(RX,self.np.dot(RY,RZ))
        T = self.np.array([[1,0,0,0],[0,1,0,0],[0,0,1,dist],[0,0,0,1]],dtype='f')
        K = self.np.array([[focal,0,self.width/2,0],[0,focal,self.height/2,0],[0,0,1,0]],dtype='f')
        self.IpmMat = self.np.dot(K,self.np.dot(T,self.np.dot(R,A1)))
        
    
        
        
    def _getMaskBG(self):
        # Mask for blank area when images were ipm mapped 
        tmp_blank = self.np.full((self.height, self.width,3), 255, dtype='uint8')
        tmp_ipm = self.cv2.warpPerspective(tmp_blank, self.IpmMat, (self.width, self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP)
        self.maskBG =  self.cv2.bitwise_not(tmp_ipm)
        
    def _getDashboard(self):
        self.board_size = 320
        size = self.board_size
        self.board = self.np.full((size*2, size*2,3), 255, dtype='uint8')
        tmp_stopline = self.cv2.imread('dashboard/stopline_red.jpg',self.cv2.IMREAD_COLOR)
        self.icon_stopline = self.cv2.resize(tmp_stopline, dsize=(size,size), interpolation=self.cv2.INTER_AREA)
        tmp_blindspot = self.cv2.imread('dashboard/blind_spot_red.jpg',self.cv2.IMREAD_COLOR)
        self.icon_blindspot = self.cv2.resize(tmp_blindspot, dsize=(size,size), interpolation=self.cv2.INTER_AREA)
        self.icon_blank = self.np.full((size,size*2,3),255,dtype='uint8')
        self.icon_schoolzone = self.np.full((size,size*2,3),0,dtype='uint8')
        self.icon_schoolzone[:,:,2] = 255
        self.icon_subs = self.np.full((size,size,3),0,dtype='uint8')
        self.icon_subs[:,:,2] = 255
        
               
        
        
    def processing(self):
        # Calibration parameters by experiments
        CamMat = self.np.array([[314.484, 0, 321.999],[0, 315.110, 259.722],[ 0, 0, 1]],dtype='f')
        DistMat = self.np.array([ -0.332015,    0.108453,    0.001100,    0.002183],dtype='f')
        
        # For inRange function in opencv
        # Modify value along your brightness condition
        lower_k = self.np.array([0,0,0])
        upper_k = self.np.array([180,255,100])
        lower_r1 = self.np.array([0,50,50])
        upper_r1 = self.np.array([30,255,255])
        lower_r2 = self.np.array([150,50,50])
        upper_r2 = self.np.array([180,255,255])
        
        
        ret, frame = self.camera.read(); del(ret) # Now take frame from camera
        calibration = self.cv2.undistort(frame, CamMat, DistMat, None, CamMat) # Calibration because of wide angle camera
        tmp_ipm1 = self.cv2.warpPerspective(calibration, self.IpmMat, (self.width,self.height), flags=self.cv2.INTER_CUBIC|self.cv2.WARP_INVERSE_MAP) # Geometrical transform image to Top view perspective
        tmp_ipm2 = self.cv2.add(tmp_ipm1, self.maskBG) # It just merges ipm image with white background
        ipm = self.cv2.bilateralFilter(tmp_ipm2,9,50,50) # Just Filter
        self.result = ipm.copy()
        hsv = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2HSV)
        gray = self.cv2.cvtColor(ipm, self.cv2.COLOR_BGR2GRAY)
        
        #canny = self.cv2.Canny(gray, 100, 200, 3) # If you want to use canny edge algorithm, activate this line
        threshold_inv = self.cv2.adaptiveThreshold(gray, 255, self.cv2.ADAPTIVE_THRESH_MEAN_C, self.cv2.THRESH_BINARY, 21, 5)
        threshold = self.cv2.bitwise_not(threshold_inv)
        #mask_k = self.cv2.inRange(hsv, lower_k, upper_k)
        #mask_k = canny.copy()
        mask_k = threshold.copy()
        self.mask_k = mask_k[self.ROI_far_rngH, self.ROI_rngW]#[self.ROI_far_rngH, self.ROI_rngW]
        self.mask_lane = mask_k[self.ROI_lane_rngH,self.ROI_lane_rngW]
        
        # Now you can get red mask for schoolzone detecting
        mask_r1 = self.cv2.inRange(hsv, lower_r1, upper_r1)
        mask_r2 = self.cv2.inRange(hsv, lower_r2, upper_r2)
        mask_r = self.cv2.add(mask_r1, mask_r2)
        self.mask_r = mask_r[self.ROI_near_rngH, self.ROI_rngW]
        
        
    def detectingSchoolzone(self):
        # Just counting red dots
        if((self.np.sum(self.mask_r)/255) > ((self.ROI_H)*(self.ROI_W)*0.2)):
            self.flag.schoolzone = True
        else:
            self.flag.schoolzone = False
    
    def laneDetect(self):
        # By Jinwon, Lane detecting algorithm.
        # adaptive detecting method
        # It need to improve
        frame = self.cv2.flip(self.mask_lane.copy(),0)
        
        H,W = frame.shape[0:2]
        
        lane_base = self.np.array(range(0,W))
        
        
        lane = self.np.full((H,1), int(W/2), dtype='uint32')
        laneL = self.np.full((H,1), int(W/2), dtype='uint32')
        laneR = self.np.full((H,1), int(W/2), dtype='uint32')
        
        
        num0 = self.np.sum(frame[0,:] != False)
        if(num0 != 0): lane[0] = int(self.np.sum(lane_base*frame[0,:])/(255*num0))
        else: lane[0] = int(W/2)
        
        for j in range(1,H):
            rangeL = range(0, int(lane[j-1]))
            rangeR = range(int(lane[j-1]), int(W))
            numL = self.np.sum(frame[j,rangeL] !=  False)
            numR = self.np.sum(frame[j,rangeR] !=  False)
            if(numL == 0)|(numR == 0): lane[j] = lane[j-1]
            else:
                laneL[j] = self.np.sum(lane_base[rangeL]*frame[j,rangeL])/(255*numL)
                laneR[j] = self.np.sum(lane_base[rangeR]*frame[j,rangeR])/(255*numR)
                lane[j] = (laneR[j] + laneL[j])/2
            self.mask_lane[int(H - j),int(lane[j])] = 255
        
        self.flag.lane_err = ((self.np.mean(lane)*2/W) -1) # Return method is various. It just return mean value
        
        
    def _setCoral(self, modeldir="Model"): # By github.com/EdjeElectronics & coral.ai
        CWD_PATH =self.os.getcwd()
        MODEL_NAME = modeldir
        GRAPH_NAME = "edgetpu.tflite" #If you don't have coral, "detect.tflite"
        LABELMAP_NAME = "labelmap.txt"
        # path to 
        PATH_TO_CKPT = self.os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)
        PATH_TO_LABELS = self.os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)
        
        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            self.coral_labels = [line.strip() for line in f.readlines()]
        
        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.coral_labels[0] == '???':
            del(self.coral_labels[0])
        
        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        self.coral_interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                  experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
                                  
        self.coral_interpreter.allocate_tensors()
        
        # Get model details
        self.coral_input_details = self.coral_interpreter.get_input_details()
        self.coral_output_details = self.coral_interpreter.get_output_details()
        self.coral_height = self.coral_input_details[0]['shape'][1]
        self.coral_width = self.coral_input_details[0]['shape'][2]

        self.coral_input_mean = 127.5
        self.coral_input_std = 127.5
        
        
        
    def detectingStopline(self): # By github.com/EdjeElectronics & coral.ai
        
        image = self.mask_k.copy()
        imH,imW = image.shape[0:2]
        
        # Get image & general
        coral_frame = self.cv2.cvtColor(image, self.cv2.COLOR_GRAY2RGB)
        frame_rgb = self.cv2.cvtColor(coral_frame, self.cv2.COLOR_BGR2RGB)
        frame_resized = self.cv2.resize(frame_rgb, (self.coral_width, self.coral_height))
        input_data = self.np.expand_dims(frame_resized, axis=0)


        # Perform the actual detection by running the model with the image as input
        self.coral_interpreter.set_tensor(self.coral_input_details[0]['index'],input_data)
        self.coral_interpreter.invoke()

        # Retrieve detection results
        self.coral_boxes = self.coral_interpreter.get_tensor(self.coral_output_details[0]['index'])[0] # Bounding box coordinates of detected objects
        self.coral_classes = self.coral_interpreter.get_tensor(self.coral_output_details[1]['index'])[0] # Class index of detected objects
        self.coral_scores = self.coral_interpreter.get_tensor(self.coral_output_details[2]['index'])[0] # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)
        
        # Threshold
        self.coral_min_conf_threshold = 0.90
        
        self.flag.stopline = False
        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(self.coral_scores)):
            if ((self.coral_scores[i] > self.coral_min_conf_threshold) and (self.coral_scores[i] <= 1.0)):
                if(self.coral_labels[int(self.coral_classes[i])] == "stopline"): self.flag.stopline = True
                
                
                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1,(self.coral_boxes[i][0] * imH)) + self.ROI_far_rngH.start)
                xmin = int(max(1,(self.coral_boxes[i][1] * imW)) + self.ROI_rngW.start)
                ymax = int(min(imH,(self.coral_boxes[i][2] * imH)) + self.ROI_far_rngH.start)
                xmax = int(min(imW,(self.coral_boxes[i][3] * imW)) + self.ROI_rngW.start)
                
                self.cv2.rectangle(self.result, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
    
                # Draw label
                object_name = self.coral_labels[int(self.coral_classes[i])] # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(self.coral_scores[i]*100)) # Example: 'person: 72%'
                labelSize, baseLine = self.cv2.getTextSize(label, self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
                label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
                self.cv2.rectangle(self.result, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), self.cv2.FILLED) # Draw white box to put label text in
                self.cv2.putText(self.result, label, (xmin, label_ymin-7), self.cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
        



    




    # Thread   
    def task(self):
        while(1):
            t1 = self.cv2.getTickCount()
            self.processing()
            self.laneDetect()
            self.detectingSchoolzone()
            self.flag.schoolzone = False #tmp
            
            
            if(self.mode == 1):
                self.detectingStopline()
            
            
            self.board[160:640,:,:] = self.result.copy()
            self.board[0:self.board_size,:,:] = self.icon_blank.copy()


            # Now, Mode Selector
            if(self.mode == 0):
                if(self.flag.schoolzone == True):
                    self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                    self.mode = 1
       
            elif(self.mode == 1):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                if(self.flag.stopline == True):
                    self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy()
                    self.mode = 2
                    self.flag.stop = True
                    
                                                                                
            elif(self.mode == 2):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                self.board[0:self.board_size,0:self.board_size,:] = self.icon_stopline.copy()
                
                if(self.flag.depart == True):
                    self.flag.depart = False
                    self.flag.powerHandle = True
                    self.mode = 3
                    
            elif(self.mode == 3):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                
                if(self.flag.refresh == True):
                    self.flag.refresh = False
                    self.flag.lidar = True
                    self.mode = 4
                    
            elif(self.mode == 4):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                if(self.flag.blindspot == True):
                    self.mode = 5
                    self.flag.slow = True
                    
            elif(self.mode == 5):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                self.board[0:self.board_size,self.board_size:(self.board_size*2),:] = self.icon_blindspot.copy()
                if(self.flag.blindspot == False):
                    self.mode = 6
                    self.flag.slow = False
                    
            elif(self.mode == 6):
                self.board[0:self.board_size,:,:] = self.icon_schoolzone.copy()
                if(self.flag.schoolzone == False):
                    self.board[0:self.board_size,:,:] = self.icon_blank.copy()
                    self.mode = 7
                    
               
            self.cv2.imshow('Dashboard', self.board)
            t2 = self.cv2.getTickCount()
            freq = self.cv2.getTickFrequency()
            #print(freq/(t2-t1))

            self.cv2.waitKey(1)
            
            if(self.mode == 7):
                self.flag.end = True
                break
        self.cv2.destroyAllWindows()

        
    def start(self):
        self.thread = self.Thread(target=self.task)
        self.thread.start()
    def startLane(self):
        self.threadLane = self.Thread(target=self.taskLane)
        self.threadLane.start()
Ejemplo n.º 27
0
class ObjectDetectorLite:
    def __init__(self, model_path, label_path):
        """
            Builds Tensorflow graph, load model and labels
        """

        # Load label_map
        self._load_label(label_path)

        # Define lite graph and Load Tensorflow Lite model into memory
        self.interpreter = Interpreter(model_path=model_path)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Get input size
        input_shape = self.input_details[0]['shape']
        self.size = input_shape[:2] if len(
            input_shape) == 3 else input_shape[1:3]

    def get_input_size(self):
        return self.size

    def detect(self, image, threshold=0.1):
        """
            Predicts person in frame with threshold level of confidence
            Returns list with top-left, bottom-right coordinates and list with labels, confidence in %
        """
        # Add a batch dimension
        frame = np.expand_dims(image, axis=0)

        # run model
        self.interpreter.set_tensor(self.input_details[0]['index'], frame)
        self.interpreter.invoke()

        # get results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        num = self.interpreter.get_tensor(self.output_details[3]['index'])

        # Find detected boxes coordinates
        return self._boxes_coordinates(image,
                                       np.squeeze(boxes[0]),
                                       np.squeeze(classes[0] + 1).astype(
                                           np.int32),
                                       np.squeeze(scores[0]),
                                       min_score_thresh=threshold)

    def close(self):
        pass

    def _boxes_coordinates(self,
                           image,
                           boxes,
                           classes,
                           scores,
                           max_boxes_to_draw=20,
                           min_score_thresh=.5):
        """
        This function groups boxes that correspond to the same location
        and creates a display string for each detection 

        Args:
          image: uint8 numpy array with shape (img_height, img_width, 3)
          boxes: a numpy array of shape [N, 4]
          classes: a numpy array of shape [N]
          scores: a numpy array of shape [N] or None.  If scores=None, then
            this function assumes that the boxes to be plotted are groundtruth
            boxes and plot all boxes as black with no classes or scores.
          max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
            all boxes.
          min_score_thresh: minimum score threshold for a box to be visualized
        """

        if not max_boxes_to_draw:
            max_boxes_to_draw = boxes.shape[0]
        number_boxes = min(max_boxes_to_draw, boxes.shape[0])
        detected_boxes = []
        probabilities = []
        categories = []

        for i in range(number_boxes):
            if scores is None or scores[i] > min_score_thresh:
                box = tuple(boxes[i].tolist())
                detected_boxes.append(box)
                probabilities.append(scores[i])
                categories.append(self.category_index[classes[i]])
        return np.array(detected_boxes), probabilities, categories

    def _load_label(self, path):
        """
            Loads labels
        """
        categories = load_labelmap(path)
        self.category_index = create_category_index(categories)
        ret, color_image = cap.read()
        if not ret:
            break

        colw = color_image.shape[1]
        colh = color_image.shape[0]
        new_w = int(colw * min(w/colw, h/colh))
        new_h = int(colh * min(w/colw, h/colh))

        resized_image = cv2.resize(color_image, (new_w, new_h), interpolation = cv2.INTER_CUBIC)
        canvas = np.full((h, w, 3), 128)
        canvas[(h - new_h)//2:(h - new_h)//2 + new_h,(w - new_w)//2:(w - new_w)//2 + new_w, :] = resized_image

        prepimg = canvas.astype(np.float32)
        prepimg = prepimg[np.newaxis, :, :, :]     # Batch size axis add
        interpreter.set_tensor(input_details[0]['index'], prepimg)
        interpreter.invoke()
        outputs = interpreter.get_tensor(output_details[0]['index']) #(1, 32, 32, 57)
        outputs = outputs.transpose((0, 3, 1, 2))  # NHWC to NCHW, (1, 57, 32, 32)

        detected_keypoints = []
        keypoints_list = np.zeros((0, 3))
        keypoint_id = 0

        for part in range(nPoints):
            probMap = outputs[0, part, :, :]
            probMap = cv2.resize(probMap, (canvas.shape[1], canvas.shape[0])) # (256, 256)
            keypoints = getKeypoints(probMap, threshold)
            keypoints_with_id = []

            for i in range(len(keypoints)):
Ejemplo n.º 29
0
while True:

    t1 = cv2.getTickCount()

    frame1 = videostream.read()

    frame = frame1.copy()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_resized = cv2.resize(frame_rgb, (width, height))
    input_data = np.expand_dims(frame_resized, axis=0)

    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[0]
    classes = interpreter.get_tensor(output_details[1]['index'])[0]
    scores = interpreter.get_tensor(output_details[2]['index'])[0]
    #num = interpreter.get_tensor(output_details[3]['index'])[0]

    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            ymin = int(max(1, (boxes[i][0] * imH)))
            xmin = int(max(1, (boxes[i][1] * imW)))
            ymax = int(min(imH, (boxes[i][2] * imH)))
            xmax = int(min(imW, (boxes[i][3] * imW)))
Ejemplo n.º 30
0
class WakeWord2:
    def __init__(self):
        # Sliding window
        self.window = np.zeros(int(RECORD_DURATION * RESAMPLE_RATE) * 2)

        # Load model
        self.interpreter = Interpreter(WAKEWORD_MODEL_PATH)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Thread and flags
        self.ON = True
        self.running = True
        self.wakeword_flag = False
        self.wakeword_thread = threading.Thread(target=self.wakeword_run,
                                                name="wakeword_thread")
        self.wakeword_thread.start()
        print('WakeWord Initialized')

    def close(self):
        self.running = False
        self.wakeword_thread.join()
        return

    # Background loop that continuously checks for wake words
    def wakeword_run(self):
        with sd.InputStream(channels=NUM_CHANNELS,
                            samplerate=SAMPLE_RATE,
                            blocksize=int(SAMPLE_RATE * RECORD_DURATION),
                            callback=self.wakeword_process):
            while self.running:
                pass

    def __call__(self):
        if self.wakeword_flag:
            self.wakeword_flag = False
            return True
        return False

    def wakeword_process(self, rec, frames, time, error):
        # Start timing for testing
        start_time = timeit.default_timer()
        # Notify if errors
        if error:
            print("Error: ", error)

        # Remove 2nd dimension from recording sample and downsample
        rec = np.squeeze(rec)
        rec = scipy.signal.decimate(rec, DOWNSAMPLE)

        # Analyze a sliding window if the sound that overlaps with last window by 50%
        # to catch wake words that might span time segments
        self.window[:len(self.window) // 2] = self.window[len(self.window) //
                                                          2:]
        self.window[len(self.window) // 2:] = rec

        # Process image with MFCC (Mel Frequency Cepstrum) that scales the frequency in order
        # to match more closely what the human ear can hear
        mfccs = python_speech_features.base.mfcc(self.window,
                                                 samplerate=RESAMPLE_RATE,
                                                 winlen=0.256,
                                                 winstep=0.050,
                                                 numcep=NUM_MFCC,
                                                 nfilt=26,
                                                 nfft=2048,
                                                 preemph=0.0,
                                                 ceplifter=0,
                                                 appendEnergy=False,
                                                 winfunc=np.hanning)
        mfccs = mfccs.transpose()

        # Make prediction from model
        in_tensor = np.float32(
            mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1))
        self.interpreter.set_tensor(self.input_details[0]['index'], in_tensor)
        self.interpreter.invoke()
        output_data = self.interpreter.get_tensor(
            self.output_details[0]['index'])
        val = output_data[0][0]
        # test for the wake word ('go')
        if val > WORD_THRESHOLD:
            print('listening')
            self.wakeword_flag = True
        if DEBUG_ACC:  # print accuracy of each detection
            print(val)
        if DEBUG_TIME:  # print processing time for a sound clip
            print(timeit.default_timer() - start_time)