Example #1
0
 def run_inference(self, input_data):
     """Run inference using the zero copy feature from pycoral and returns inference time in ms.
     """
     start = time.monotonic()
     edgetpu.run_inference(self._interpreter, input_data)
     self._inf_time = time.monotonic() - start
     return (self._inf_time * 1000)
Example #2
0
 def _run_inference_with_gst(self, interpreter, input_data):
     output_index = interpreter.get_output_details()[0]['index']
     bytes_input = bytes(input_data)
     gst_input = Gst.Buffer.new_wrapped(bytes_input)
     edgetpu.run_inference(interpreter, gst_input)
     ret = interpreter.tensor(output_index)()
     return np.copy(ret)
Example #3
0
    def authorized_get(self):
        if self.path == '/':
            self.send_response(301)
            self.send_header('Location', '/index.html')
            self.end_headers()
        elif self.path == '/index.html':
            content = PAGE.encode('utf-8')
            self.send_response(200)
            self.send_header('Content-Type', 'text/html')
            self.send_header('Content-Length', len(content))
            self.end_headers()
            self.wfile.write(content)
        elif self.path == '/stream.mjpg':
            self.send_response(200)
            self.send_header('Age', 0)
            self.send_header('Cache-Control', 'no-cache, private')
            self.send_header('Pragma', 'no-cache')
            self.send_header('Content-Type', 'multipart/x-mixed-replace; boundary=FRAME')
            self.end_headers()
            try:
                stream_video = io.BytesIO()
                fps.start()
                while True:
                    # getting image
                    frame = camera.read()
                    cv2_im = frame
                            
                    # cv2 coding
                    cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
                    cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)
                    if VFLIP:
                        cv2_im_rgb = cv2.flip(cv2_im_rgb, 0)
                    if HFLIP:
                        cv2_im_rgb = cv2.flip(cv2_im_rgb, 1)

                    # object detection
                    run_inference(interpreter, cv2_im_rgb.tobytes())
                    objs = get_objects(interpreter, args.threshold)[:args.top_k]

                    cv2_im = self.append_objs_to_img(cv2_im, inference_size, objs, labels)
                    r, buf = cv2.imencode(".jpg", cv2_im)

                    self.wfile.write(b'--FRAME\r\n')
                    self.send_header('Content-type','image/jpeg')
                    self.send_header('Content-length',str(len(buf)))
                    self.end_headers()
                    self.wfile.write(bytearray(buf))
                    self.wfile.write(b'\r\n')
                    fps.update()

            except Exception as e:
                logging.warning(
                    'Removed streaming client %s: %s',
                    self.client_address, str(e))

        else:
            self.send_error(404)
            self.end_headers()
Example #4
0
    def run_inference(self, input):
        start_time = time.monotonic()
        run_inference(self._interpreter, input)
        duration_ms = (time.monotonic() - start_time) * 1000

        output = []
        for details in self._interpreter.get_output_details():
            tensor = self._interpreter.get_tensor(details['index'])
            output.append(tensor)

        return (duration_ms, output)
Example #5
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.labels)
    inference_size = input_size(interpreter)

    cap = cv2.VideoCapture(args.camera_idx)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)
        run_inference(interpreter, cv2_im_rgb.tobytes())
        objs = get_objects(interpreter, args.threshold)[:args.top_k]
        cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels)

        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite'
    default_labels = 'imagenet_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()

    pygame.init()
    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    print('By default using camera: ', camlist[-1])
    camera = pygame.camera.Camera(camlist[-1], (640, 480))
    inference_size = input_size(interpreter)
    camera.start()
    try:
        last_time = time.monotonic()
        while True:
            imagen = camera.get_image()
            imagen = pygame.transform.scale(imagen, inference_size)
            start_ms = time.time()
            run_inference(interpreter, imagen.get_buffer().raw)
            results = get_classes(interpreter, top_k=3, score_threshold=0)
            stop_time = time.monotonic()
            inference_ms = (time.time() - start_ms) * 1000.0
            fps_ms = 1.0 / (stop_time - last_time)
            last_time = stop_time
            annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format(
                inference_ms, fps_ms)
            for result in results:
                annotate_text += '\n{:.0f}% {}'.format(100 * result[1],
                                                       labels[result[0]])
            print(annotate_text)
    finally:
        camera.stop()
Example #7
0
    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        run_inference(interpreter, input_tensor)

        results = get_classes(interpreter, args.top_k, args.threshold)
        end_time = time.monotonic()
        text_lines = [
            ' ',
            'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
            'FPS: {} fps'.format(round(next(fps_counter))),
        ]
        for result in results:
            text_lines.append('score={:.2f}: {}'.format(
                result.score, labels.get(result.id, result.id)))
        print(' '.join(text_lines))
        return generate_svg(src_size, text_lines)
def capture_v(args):
    global outputFrame, lock

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.labels)
    inference_size = input_size(interpreter)

    cap = cv2.VideoCapture(args.camera_idx)
    # cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
    # Sony PS3 EYE cam settings:
    # 320x240 @ 125 FPS, 640x480 @ 60 FPS, 320x240 @187 FPS --> use excat FSP setting
    cap.set(cv2.CAP_PROP_FPS, 60)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320),
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print("image size=", size)

    fps = 0
    start_time = time.time()

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)
        run_inference(interpreter, cv2_im_rgb.tobytes())
        objs = get_objects(interpreter, args.threshold)[:args.top_k]
        cv2_im = append_objs_to_img(cv2_im, inference_size, objs, labels)
        with lock:
            outputFrame = cv2_im
        fps += 1
        if fps == 200:
            end_time = time.time()
            print("cam FPS:", fps / (end_time - start_time))
            start_time = time.time()
            fps = 0

    cap.release()
Example #9
0
    def run(self):
        print('Loading {} with {} labels.'.format(self.model, self.labels))
        interpreter = make_interpreter(self.model)
        interpreter.allocate_tensors()
        readLabels = read_label_file(self.labels)
        inference_size = input_size(interpreter)

        while(True):
            # Capture frame-by-frame
            frameWebcam = self.webcam.read()
            frameWebcam = imutils.resize(frameWebcam, width=800)
            framePicam = self.picam.read()
            framePicam = imutils.resize(framePicam, width=600)
            # Wenn nicht Coral eingesetzt werden soll, dann die Zeile auskommentieren und den nächsten Block kommentieren
            #grayWebcam = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2GRAY)
            #Bild holen und dieses danach im Coral Interpreter verarbeiten
            cv2_im_rgb = cv2.cvtColor(frameWebcam, cv2.COLOR_BGR2RGB)
            cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)
            run_inference(interpreter, cv2_im_rgb.tobytes())
            objs = get_objects(interpreter, self.threshold)[:self.top_k]
            cv2_im = self.append_objs_to_img(
                frameWebcam, inference_size, objs, readLabels)
            #Video in Datei schreiben
            self.out.write(cv2_im)
            #starten der Picam
            grayPicam = cv2.cvtColor(framePicam, cv2.COLOR_BGR2GRAY)
            grayPicam = cv2.rotate(grayPicam, cv2.cv2.ROTATE_180)
            #grayPicam = cv2.GaussianBlur(grayPicam, (21, 21), 0)
            # Display the resulting frame
            cv2.imshow("RobotBack", grayPicam)
            cv2.imshow("RobotFront", cv2_im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # When everything done, release the capture
        self.out.release()
        cv2.destroyAllWindows()
        self.webcam.stop()
        self.picam.stop()
def detect_and_classify_faces(detector,
                              classifier,
                              image,
                              threshold,
                              padding=10):
    predictions = []
    boxes = []
    faces = []
    height, width, _ = image.shape
    detector_target_size = common.input_size(detector)
    classifier_target_size = common.input_size(classifier)

    scale_x, scale_y = width / detector_target_size[
        0], height / detector_target_size[1]
    resized_image = cv2.resize(image, detector_target_size)
    run_inference(detector, resized_image.tobytes())
    objects = detect.get_objects(detector, threshold)

    for object in objects:
        bbox = object.bbox.scale(scale_x, scale_y)
        startX, startY = int(bbox.xmin - padding), int(bbox.ymin - padding)
        endX, endY = int(bbox.xmax + padding), int(bbox.ymax + padding)

        # ensure the bounding boxes fall within the dimensions of the image
        (startX, startY) = (max(1, startX), max(1, startY))
        (endX, endY) = (min(width - 1, endX), min(height - 1, endY))
        boxes.append((startX, startY, endX, endY))

        face = image[startY:endY, startX:endX]
        face = cv2.resize(face, classifier_target_size)
        faces.append(face)

    for face in faces:
        run_inference(classifier, face.tobytes())
        prediction = classify.get_scores(classifier)
        predictions.append(prediction)

    return (boxes, predictions)
Example #11
0
    def _run_inference_with_different_input_types(self, interpreter,
                                                  input_data):
        """Tests inference with different input types.

    It doesn't check correctness of inference. Instead it checks inference
    repeatability with different input types.

    Args:
      interpreter : A tflite interpreter.
      input_data (list): A 1-D list as the input tensor.
    """
        output_index = interpreter.get_output_details()[0]['index']
        # numpy array
        np_input = np.asarray(input_data, np.uint8)
        edgetpu.run_inference(interpreter, np_input)
        ret = interpreter.tensor(output_index)()
        ret0 = np.copy(ret)
        self.assertTrue(np.array_equal(ret0, ret))
        # bytes
        bytes_input = bytes(input_data)
        edgetpu.run_inference(interpreter, bytes_input)
        ret = interpreter.tensor(output_index)()
        self.assertTrue(np.array_equal(ret0, ret))
        # ctypes
        edgetpu.run_inference(
            interpreter,
            (np_input.ctypes.data_as(ctypes.c_void_p), np_input.size))
        ret = interpreter.tensor(output_index)()
        self.assertTrue(np.array_equal(ret0, ret))
        # Gst buffer
        if _libgst:
            gst_input = Gst.Buffer.new_wrapped(bytes_input)
            edgetpu.run_inference(interpreter, gst_input)
            self.assertTrue(np.array_equal(ret0, ret))
        else:
            print('Can not import gi. Skip test on Gst.Buffer input type.')
Example #12
0
def main():
    global mot_tracker
    default_model_dir = '../models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--tracker', help='Name of the Object Tracker To be used.',
                        default=None,
                        choices=[None, 'sort'])
    parser.add_argument('--videosrc', help='Directly connected (dev) or Networked (net) video source. ', choices=['dev','net','file'],
                        default='dev')
    parser.add_argument('--display', help='Is a display attached',
                        default='False',
                        choices=['True', 'False'])
    parser.add_argument('--netsrc', help="Networked video source, example format: rtsp://192.168.1.43/mpeg4/media.amp",)
    parser.add_argument('--filesrc', help="Video file source. The videos subdirectory gets mapped into the Docker container, so place your files there.",)
    parser.add_argument('--modelInt8', help="Model expects input tensors to be Int8, not UInt8", default='False', choices=['True', 'False'])
    
    args = parser.parse_args()
    
    trackerName=args.tracker
    ''' Check for the object tracker.'''
    if trackerName != None:
        if trackerName == 'mediapipe':
            if detectCoralDevBoard():
                objectOfTracker = ObjectTracker('mediapipe')
            else:
                print("Tracker MediaPipe is only available on the Dev Board. Keeping the tracker as None")
                trackerName = None
        else:
            objectOfTracker = ObjectTracker(trackerName)
    else:
        pass
    
    if trackerName != None and objectOfTracker:
        mot_tracker = objectOfTracker.trackerObject.mot_tracker
    else:
        mot_tracker = None
    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.labels)
    inference_size = input_size(interpreter)
    if args.modelInt8=='True':
        model_int8 = True
    else:
        model_int8 = False

    if args.videosrc=='dev': 
        cap = cv2.VideoCapture(args.camera_idx)
    elif args.videosrc=='file':
        cap = cv2.VideoCapture(args.filesrc)    
    else:
        if args.netsrc==None:
            print("--videosrc was set to net but --netsrc was not specified")
            sys.exit()
        cap = cv2.VideoCapture(args.netsrc)    
        
    cap.set(cv2.CAP_PROP_BUFFERSIZE, 0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: 
            if args.videosrc=='file':
                cap = cv2.VideoCapture(args.filesrc)
                continue  
            else:
                break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)

        if model_int8:
            im_pil = Image.fromarray(cv2_im_rgb)
            input_type = common.input_details(interpreter, 'dtype')
            img = (input_type(cv2_im_rgb)- 127.5) / 128.0
        
            run_inference(interpreter, img.flatten())
        else:
            run_inference(interpreter, cv2_im_rgb.tobytes())

        objs = get_objects(interpreter, args.threshold)[:args.top_k]
        height, width, channels = cv2_im.shape
        scale_x, scale_y = width / inference_size[0], height / inference_size[1]
        detections = []  # np.array([])
        for obj in objs:
            bbox = obj.bbox.scale(scale_x, scale_y)
            element = []  # np.array([])
            element.append(bbox.xmin)
            element.append(bbox.ymin)
            element.append(bbox.xmax)
            element.append(bbox.ymax)
            element.append(obj.score)  # print('element= ',element)
            element.append(obj.id)
            detections.append(element)  # print('dets: ',dets)
        # convert to numpy array #      print('npdets: ',dets)
        detections = np.array(detections)
        trdata = []
        trackerFlag = False
        if detections.any():
            if mot_tracker != None:
                trdata = mot_tracker.update(detections)
                trackerFlag = True

        cv2_im = append_objs_to_img(cv2_im,  detections, labels, trdata, trackerFlag)
        
        if args.display == 'True':
            cv2.imshow('frame', cv2_im)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Example #13
0
def main():
    global mot_tracker
    global mqtt_bridge
    global mqtt_topic

    camera_width = 1280
    camera_height = 720

    default_model_dir = '../models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--tracker',
                        help='Name of the Object Tracker To be used.',
                        default=None,
                        choices=[None, 'sort'])
    parser.add_argument(
        '--videosrc',
        help='Directly connected (dev) or Networked (net) video source. ',
        choices=['dev', 'net', 'file'],
        default='dev')
    parser.add_argument('--display',
                        help='Is a display attached',
                        default='False',
                        choices=['True', 'False'])
    parser.add_argument(
        '--netsrc',
        help=
        "Networked video source, example format: rtsp://192.168.1.43/mpeg4/media.amp",
    )
    parser.add_argument(
        '--filesrc',
        help=
        "Video file source. The videos subdirectory gets mapped into the Docker container, so place your files there.",
    )
    parser.add_argument(
        '--modelInt8',
        help="Model expects input tensors to be Int8, not UInt8",
        default='False',
        choices=['True', 'False'])
    parser.add_argument('--mqtt-host',
                        help="MQTT broker hostname",
                        default='127.0.0.1')
    parser.add_argument('--mqtt-port',
                        type=int,
                        help="MQTT broker port number (default 1883)",
                        default=1883)
    parser.add_argument('--mqtt-topic',
                        dest='mqtt_topic',
                        help="MQTT Object Tracking topic",
                        default="skyscan/object/json")

    args = parser.parse_args()

    trackerName = args.tracker
    ''' Check for the object tracker.'''
    if trackerName != None:
        if trackerName == 'mediapipe':
            if detectCoralDevBoard():
                objectOfTracker = ObjectTracker('mediapipe')
            else:
                print(
                    "Tracker MediaPipe is only available on the Dev Board. Keeping the tracker as None"
                )
                trackerName = None
        else:
            objectOfTracker = ObjectTracker(trackerName)
    else:
        pass

    if trackerName != None and objectOfTracker:
        mot_tracker = objectOfTracker.trackerObject.mot_tracker
    else:
        mot_tracker = None
    mqtt_topic = args.mqtt_topic
    mqtt_bridge = mqtt_wrapper.bridge(host=args.mqtt_host,
                                      port=args.mqtt_port,
                                      client_id="skyscan-object-tracker-%s" %
                                      (ID))
    mqtt_bridge.publish("skyscan/registration",
                        "skyscan-adsb-mqtt-" + ID + " Registration", 0, False)

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.labels)
    inference_size = input_size(interpreter)
    if args.modelInt8 == 'True':
        model_int8 = True
    else:
        model_int8 = False

    if args.videosrc == 'dev':
        cap = cv2.VideoCapture(args.camera_idx)
    elif args.videosrc == 'file':
        cap = cv2.VideoCapture(args.filesrc)
    else:
        if args.netsrc == None:
            print("--videosrc was set to net but --netsrc was not specified")
            sys.exit()
        cap = cv2.VideoCapture(args.netsrc)

    cap.set(cv2.CAP_PROP_BUFFERSIZE, 0)
    timeHeartbeat = 0
    while cap.isOpened():
        if timeHeartbeat < time.mktime(time.gmtime()):
            timeHeartbeat = time.mktime(time.gmtime()) + 10
            mqtt_bridge.publish("skyscan/heartbeat",
                                "skyscan-object-tracker-" + ID + " Heartbeat",
                                0, False)
        start_time = time.monotonic()
        ret, frame = cap.read()
        if not ret:
            if args.videosrc == 'file':
                cap = cv2.VideoCapture(args.filesrc)
                continue
            else:
                break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        cv2_im_rgb = cv2.resize(cv2_im_rgb, inference_size)

        if model_int8:
            im_pil = Image.fromarray(cv2_im_rgb)
            input_type = common.input_details(interpreter, 'dtype')
            img = (input_type(cv2_im_rgb) - 127.5) / 128.0

            run_inference(interpreter, img.flatten())
        else:
            run_inference(interpreter, cv2_im_rgb.tobytes())

        objs = get_objects(interpreter, args.threshold)[:args.top_k]
        height, width, channels = cv2_im.shape
        scale_x, scale_y = width / inference_size[0], height / inference_size[1]
        detections = []  # np.array([])
        for obj in objs:
            bbox = obj.bbox.scale(scale_x, scale_y)
            element = []  # np.array([])
            element.append(bbox.xmin)
            element.append(bbox.ymin)
            element.append(bbox.xmax)
            element.append(bbox.ymax)
            element.append(obj.score)  # print('element= ',element)
            element.append(obj.id)
            detections.append(element)  # print('dets: ',dets)
        # convert to numpy array #      print('npdets: ',dets)
        detections = np.array(detections)
        trdata = []
        trackerFlag = False
        if detections.any():
            if mot_tracker != None:
                trdata = mot_tracker.update(detections)
                trackerFlag = True

        cv2_im = append_objs_to_img(cv2_im, detections, labels, trdata,
                                    trackerFlag)
        follow_x, follow_y = object_to_follow(detections, labels, trdata,
                                              trackerFlag)
        if args.display == 'True':
            cv2.imshow('frame', cv2_im)

        if follow_x != None:
            follow_x = int(follow_x * (camera_height / height))
            follow_y = int(follow_y * (camera_width / width))
            coordinates = motionControl(follow_x, follow_y)
            follow = {"x": coordinates[0], "y": coordinates[1]}
            follow_json = json.dumps(follow)
            end_time = time.monotonic()
            print("x: {} y:{} new_x: {} new_y: {} Inference: {:.2f} ms".format(
                follow_x, follow_y, coordinates[0], coordinates[1],
                (end_time - start_time) * 1000))
            mqtt_bridge.publish(mqtt_topic, follow_json, 0, False)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Example #14
0
 def detect(self, cv2_im_rgb):
     run_inference(self.interpreter, cv2_im_rgb.tobytes())
     objs = get_objects(self.interpreter, self.threshold)[:self.max_faces]
     return objs
Example #15
0
        packed_msg_size = data[:payload_size]
        data = data[payload_size:]
        msg_size = struct.unpack(">L", packed_msg_size)[0]

        while len(data) < msg_size:
            data += conn.recv(4096)

        frame_data = data[:msg_size]
        data = data[msg_size:]

        frame = pickle.loads(frame_data, fix_imports=True, encoding="bytes")
        frame = cv2.imdecode(frame, cv2.COLOR_BGR2RGB)
        height, width, channels = frame.shape

        frame = cv2.resize(frame, inference_size)
        run_inference(interpreter, frame.tobytes())
        objs = get_objects(interpreter, args.threshold)[:args.top_k]

        scale_x, scale_y = width / inference_size[0], height / inference_size[1]

        ret_array = []
        for obj in objs:
            ret = {}

            bbox = obj.bbox.scale(scale_x, scale_y)
            x0, y0 = int(bbox.xmin), int(bbox.ymin)
            x1, y1 = int(bbox.xmax), int(bbox.ymax)

            percent = int(100 * obj.score)
            ret["label"] = labels.get(obj.id, obj.id)
            ret["percent"] = percent
def main():
    cam_w, cam_h = 640, 480
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=5,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='classifier score threshold')
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    print('Loading {} with {} labels.'.format(args.model, args.labels))

    interpreter = make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = read_label_file(args.labels)

    pygame.init()
    pygame.font.init()
    font = pygame.font.SysFont('Arial', 20)

    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    inference_size = input_size(interpreter)

    camera = None
    for cam in camlist:
        try:
            camera = pygame.camera.Camera(cam, (cam_w, cam_h))
            camera.start()
            print(str(cam) + ' opened')
            break
        except SystemError as e:
            print('Failed to open {}: {}'.format(str(cam), str(e)))
            camera = None
    if not camera:
        sys.stderr.write("\nERROR: Unable to open a camera.\n")
        sys, exit(1)

    try:
        display = pygame.display.set_mode((cam_w, cam_h), 0)
    except pygame.error as e:
        sys.stderr.write(
            "\nERROR: Unable to open a display window. Make sure a monitor is attached and that "
            "the DISPLAY environment variable is set. Example: \n"
            ">export DISPLAY=\":0\" \n")
        raise e

    red = pygame.Color(255, 0, 0)

    scale_x, scale_y = cam_w / inference_size[0], cam_h / inference_size[1]
    try:
        last_time = time.monotonic()
        while True:
            mysurface = camera.get_image()
            imagen = pygame.transform.scale(mysurface, inference_size)
            start_time = time.monotonic()
            run_inference(interpreter, imagen.get_buffer().raw)
            results = get_objects(interpreter, args.threshold)[:args.top_k]
            stop_time = time.monotonic()
            inference_ms = (stop_time - start_time) * 1000.0
            fps_ms = 1.0 / (stop_time - last_time)
            last_time = stop_time
            annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format(
                inference_ms, fps_ms)
            for result in results:
                bbox = result.bbox.scale(scale_x, scale_y)
                rect = pygame.Rect(bbox.xmin, bbox.ymin, bbox.width,
                                   bbox.height)
                pygame.draw.rect(mysurface, red, rect, 1)
                label = '{:.0f}% {}'.format(100 * result.score,
                                            labels.get(result.id, result.id))
                text = font.render(label, True, red)
                print(label, ' ', end='')
                mysurface.blit(text, (bbox.xmin, bbox.ymin))
            text = font.render(annotate_text, True, red)
            print(annotate_text)
            mysurface.blit(text, (0, 0))
            display.blit(mysurface, (0, 0))
            pygame.display.flip()
    finally:
        camera.stop()