Esempio n. 1
0
def detect_objects(threshold = 0.1, top_count=3):
    interpreter = common.make_interpreter(default_model)
    interpreter.allocate_tensors()
    labels = load_labels(default_labels)
    cap = cv2.VideoCapture(default_camera_idx)
    
    if cap.isOpened():
        for i in range(0,15):
            ret, frame = cap.read()
            time.sleep(1/1000)
            if not ret:
                break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=threshold, top_k=top_count)

        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        cv2.imshow('detect', cv2_im)
        cv2.waitKey(50)
        
        def make(obj):        
            return Result(
                percent = int(100 * obj.score),
                label = labels.get(obj.id, 'unknown')
            )
        cap.release()
        return [make(obj) for obj in objs]
 def __init__(self):
     print('Loading {} with {} labels.'.format(model, labels))
     self.interpreter = common.make_interpreter(model)
     self.interpreter.allocate_tensors()
     self.labels = load_labels(labels)
     self.video = cv2.VideoCapture(0)
     self.file = open("/home/mendel/person_detected.txt","w")
Esempio n. 3
0
def main():
  parser = argparse.ArgumentParser(
      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument(
      '--model', help='File path of .tflite model.', default='inception_v4_299_quant_edgetpu.tflite')
  parser.add_argument(
      '--labels', help='File path of labels file.', default='imagenet_labels.txt')
  parser.add_argument(
      '--top_k', help='Number of classifications to list', type=int, default=1)
  args = parser.parse_args()

  print('Initializing TF Lite interpreter...')
  
  interpreter = common.make_interpreter(os.path.join(default_model_dir,args.model))
  interpreter.allocate_tensors()
  labels = load_labels(os.path.join(default_model_dir, args.labels))
  cap = cv2.VideoCapture(0)
  
  while (True):
    ret, frame = cap.read()
    cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_im = Image.fromarray(cv2_im_rgb)
    common.set_input(interpreter, pil_im)
    
    results = classify_image(interpreter, pil_im, args.top_k)
    for label_id, prob in results:
      cv2.putText(frame, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2)
      print('%s: %.5f' % (labels[label_id], prob))

    cv2.imshow('Classification', frame)
    if cv2.waitKey(1) == ord('q'):
      break

  cap.release()
  cv2.destroyAllWindows()
Esempio n. 4
0
def detect_objects(args):
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)
    dirname = args.images

    dirpath = Path('results/' + dirname)
    if dirpath.exists() and dirpath.is_dir():
        shutil.rmtree(dirpath)
    Path("results/" + dirname).mkdir(parents=True, exist_ok=True)

    for filename in glob.glob(dirname + "/*.jpeg"):
        print(filename)
        name = os.path.basename(filename)
        pil_im = Image.open(filename)
        open_cv_image = np.array(pil_im)
        snapshot_im = pil_im
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        #print(objs)
        open_cv_image = append_objs_to_img(open_cv_image, objs, labels)
        cv2_im_rgb = cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB)
        (flag, encodedImage) = cv2.imencode(".jpeg", cv2_im_rgb)
        #print(flag)
        #print(encodedImage)
        f = open("./results/" + dirname + "/" + name, "wb")
        f.write(encodedImage)
        f.close()
Esempio n. 5
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(args.camera_idx)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)

        #cv2.imshow('frame', cv2_im)
        #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        #pil_im = Image.fromarray(cv2_im_rgb)
        #handle_image_conversion(pil_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
def main():
    default_model_dir = "../coral/models"
    default_label_dir = "../coral/labels"
    default_model = "mobilenet_v2_1.0_224_quant_edgetpu.tflite"
    default_labels = "imagenet_labels.txt"
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        help=".tflite model path",
        default=os.path.join(default_model_dir, default_model),
    )
    parser.add_argument(
        "--labels",
        help="label file path",
        default=os.path.join(default_label_dir, default_labels),
    )
    args = parser.parse_args()

    with open(args.labels, "r") as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()

    with picamera.PiCamera() as camera:
        camera.resolution = (640, 480)
        camera.framerate = 30
        camera.annotate_text_size = 20
        width, height, channels = common.input_image_size(interpreter)
        camera.start_preview()
        try:
            stream = io.BytesIO()
            fps = deque(maxlen=20)
            fps.append(time.time())
            for foo in camera.capture_continuous(stream,
                                                 format="rgb",
                                                 use_video_port=True,
                                                 resize=(width, height)):
                stream.truncate()
                stream.seek(0)
                input = np.frombuffer(stream.getvalue(), dtype=np.uint8)
                start_ms = time.time()
                common.input_tensor(interpreter)[:, :] = np.reshape(
                    input, common.input_image_size(interpreter))
                interpreter.invoke()
                results = get_output(interpreter, top_k=3, score_threshold=0)
                inference_ms = (time.time() - start_ms) * 1000.0
                fps.append(time.time())
                fps_ms = len(fps) / (fps[-1] - fps[0])
                camera.annotate_text = "Inference: {:5.2f}ms FPS: {:3.1f}".format(
                    inference_ms, fps_ms)
                for result in results:
                    camera.annotate_text += "\n{:.0f}% {}".format(
                        100 * result[1], labels[result[0]])
                print(camera.annotate_text)
                sleep(2)
        finally:
            camera.stop_preview()
def main():
    default_model_dir = 'model'
    default_model = 'ssdlite_mobiledet_quant_postprocess_edgetpu.tflite'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='classifier score threshold')
    parser.add_argument('--videosrc',
                        help='Which video source to use. ',
                        default='/dev/video0')
    parser.add_argument('--videofmt',
                        help='Input video format.',
                        default='raw',
                        choices=['raw', 'h264', 'jpeg'])
    args = parser.parse_args()

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()

    w, h, _ = common.input_image_size(interpreter)
    inference_size = (w, h)
    # Average fps over last 30 frames.
    fps_counter = common.avg_fps_counter(100)

    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(interpreter, input_tensor)
        interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        objs = get_output(interpreter, args.threshold, args.top_k)
        end_time = time.monotonic()
        text_lines = [
            'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
            'FPS: {} fps'.format(round(next(fps_counter))),
        ]
        print(' '.join(text_lines))
        return generate_svg(src_size, inference_size, inference_box, objs,
                            text_lines)

    result = gstreamer.run_pipeline(user_callback,
                                    src_size=(640, 480),
                                    appsink_size=inference_size,
                                    videosrc=args.videosrc,
                                    videofmt=args.videofmt)
Esempio n. 8
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite'
    default_labels = 'imagenet_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--videosrc', help='Which video source to use. ',
                        default='/dev/video0')
    parser.add_argument('--videofmt', help='Input video format.',
                        default='raw',
                        choices=['raw', 'h264', 'jpeg'])
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    w, h, _  = common.input_image_size(interpreter)
    inference_size = (w, h)
    # Average fps over last 30 frames.
    fps_counter = common.avg_fps_counter(30)

    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(interpreter, input_tensor)
        interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        results = get_output(interpreter, args.top_k, args.threshold)
        end_time = time.monotonic()
        text_lines = [
            ' ',
            'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
            'FPS: {} fps'.format(round(next(fps_counter))),
        ]
        for result in results:
            text_lines.append('score={:.2f}: {}'.format(result.score, labels.get(result.id, result.id)))
        print(' '.join(text_lines))
        return generate_svg(src_size, text_lines)

    result = gstreamer.run_pipeline(user_callback,
                                    src_size=(640, 480),
                                    appsink_size=inference_size,
                                    videosrc=args.videosrc,
                                    videofmt=args.videofmt)
Esempio n. 9
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=str, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # imagezmq receiver
    image_hub = imagezmq.ImageHub(open_port='tcp://147.47.200.65:35556', REQ_REP=False) # REQ_REP=False: use PUB/SUB (non-block)

    #cap = cv2.VideoCapture(args.camera_idx)

    while True:
        # receive from zmq
        timestamp, frame = image_hub.recv_image()
        dt = datetime.fromtimestamp(timestamp)
        #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        start = time.monotonic()
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        inference_time = time.monotonic() - start
        inference_time = 'Inference time: %.2f ms (%.2f fps)' % (inference_time * 1000, 1.0 / inference_time)

        cv2_im = append_objs_to_img(cv2_im, objs, labels, inference_time, dt)
        #cv2_im = cv2.resize(cv2_im, (720, 720))

        cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
        cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
        cv2.imshow("frame", cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()
Esempio n. 10
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_v2_1.0_224_quant_edgetpu.tflite'
    default_labels = 'imagenet_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()

    pygame.init()
    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    print('By default using camera: ', camlist[-1])
    camera = pygame.camera.Camera(camlist[-1], (640, 480))
    width, height, channels = common.input_image_size(interpreter)
    camera.start()
    try:
        fps = deque(maxlen=20)
        fps.append(time.time())
        while True:
            imagen = camera.get_image()
            imagen = pygame.transform.scale(imagen, (width, height))
            input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8)
            start_ms = time.time()
            common.input_tensor(interpreter)[:, :] = np.reshape(
                input, (common.input_image_size(interpreter)))
            interpreter.invoke()
            results = get_output(interpreter, top_k=3, score_threshold=0)
            inference_ms = (time.time() - start_ms) * 1000.0
            fps.append(time.time())
            fps_ms = len(fps) / (fps[-1] - fps[0])
            annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format(
                inference_ms, fps_ms)
            for result in results:
                annotate_text += '\n{:.0f}% {}'.format(100 * result[1],
                                                       labels[result[0]])
            print(annotate_text)
    finally:
        camera.stop()
Esempio n. 11
0
def detect_object(args):
    global outputFrame, lock

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    if args.videosrc=='dev': 
        cap = cv2.VideoCapture(args.camera_idx)
        
    else:
        if args.netsrc==None:
            print("--videosrc was set to net but --netsrc was not specified")
            sys.exit()
        cap = cv2.VideoCapture(args.netsrc)        

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        snapshot_im = pil_im
        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        if args.displayBool == 'True':
            cv2.imshow('frame', cv2_im)

        # acquire the lock, set the output frame, and release the
        # lock
        with lock:
            outputFrame = cv2_im.copy()


        if (time.time() - last_save) >=1:
            take_snapshot(snapshot_im, objs, labels, exclude=args.exclude.split(','), include=args.include.split(','))
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 12
0
def serve():
    default_model_dir = '/media/mendel/detection-server/models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default=1)
    parser.add_argument('--threshold', type=float, help='Detector threshold. ', default=0.7)
    parser.add_argument('--display', dest='display', action='store_true', help='Display object data. ')
    parser.set_defaults(display=False)
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(os.path.join(default_model_dir, args.model))
    interpreter.allocate_tensors()
    labels = common.load_labels(os.path.join(default_model_dir, args.labels))

    # Get native camera resolution.
    cap = cv2.VideoCapture(args.camera_idx)
    camera_res = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Start a thread to detect objects in camera frames.
        future = executor.submit(start_detector, args.camera_idx, interpreter,
            args.threshold, labels, camera_res, args.display)

        # Start other threads for the gprc server. 
        server = grpc.server(executor)
        detection_server_pb2_grpc.add_DetectionServerServicer_to_server(
            DetectionServerServicer(camera_res), server)
        server.add_insecure_port('[::]:50051')
        server.start()

        # Show the value returned by the executor.submit call.
        # This will wait forever unless a runtime error is encountered.
        future.result()

        server.stop(None)
Esempio n. 13
0
    def __init__(self, configuration):
        self.log_directory = configuration['log_directory']
        self.model_directory = configuration['model_directory']
        self.model = os.path.join(self.model_directory, configuration['model'])
        self.labels_file = os.path.join(self.model_directory,
                                        configuration['labels'])
        self.top_k = int(configuration['top_k'])
        self.camera_id = int(configuration['camera_id'])
        self.score_threshold = float(configuration['score_threshold'])
        self.clip_duration_sec = int(configuration['clip_duration_sec'])
        self.expire_time = self.clip_duration_sec
        self.video_directory = configuration['video_directory']
        self.camera_stream_url = configuration['camera_stream_url']

        print('Loading {} with {} labels.'.format(self.model,
                                                  self.labels_file))

        self.interpreter = common.make_interpreter(self.model)
        self.interpreter.allocate_tensors()
        self.stream = cv2.VideoCapture(self.camera_stream_url)
Esempio n. 14
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    w, h, _ = common.input_image_size(interpreter)
    inference_size = (w, h)
    # Average fps over last 30 frames.
    fps_counter  = common.avg_fps_counter(30)

    def user_callback(input_tensor, src_size, inference_box):
      nonlocal fps_counter
      start_time = time.monotonic()
      common.set_input(interpreter, input_tensor)
      interpreter.invoke()
      # For larger input image sizes, use the edgetpu.classification.engine for better performance
      objs = get_output(interpreter, args.threshold, args.top_k)
      end_time = time.monotonic()
      text_lines = [
          'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
          'FPS: {} fps'.format(round(next(fps_counter))),
      ]
      print(' '.join(text_lines))
      return generate_svg(src_size, inference_size, inference_box, objs, labels, text_lines)

    result = gstreamer.run_pipeline(user_callback, appsink_size=inference_size)
Esempio n. 15
0
def classify(model_type=ModelType.General, top_k=1):
    interpreter = common.make_interpreter(model_type.model_path())
    interpreter.allocate_tensors()
    labels = load_labels(model_type.label_path())
    cap = cv2.VideoCapture(0)
    
    if cap.isOpened():
        for i in range(0,15):
            ret, frame = cap.read()
            time.sleep(1/1000)
            if not ret:
                break
        cv2_im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)
        common.set_input(interpreter, pil_im)
        
        results = classify_image(interpreter, pil_im, top_k)
        
        for label_id, prob in results:
            cv2.putText(cv2_im_rgb, labels[label_id], (5,35), cv2.FONT_HERSHEY_SIMPLEX, .7, (0,0,0), 2)
            print('%s: %.5f' % (labels[label_id], prob))
        
        cv2.imshow('Classification', cv2_im_rgb)
        cv2.waitKey(50)
        
        def make(obj):
            fs = "{0}({1})"
            parsed = parse.parse(fs, labels[obj[0]])
            if parsed != None and len(parsed.fixed) > 1:
                tLabel = parsed[1]
            else:
                tLabel = labels[obj[0]]
            return Result(
                label = tLabel,
                percent = int(100 * obj[1])
                )
        cap.release()
        return [make(obj) for obj in results]
def main():
    cam_w, cam_h = 640, 480
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=5,
                        help='number of categories with highest score to display')
    parser.add_argument('--threshold', type=float, default=0.5,
                        help='classifier score threshold')
    args = parser.parse_args()

    with open(args.labels, 'r') as f:
        pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
        labels = dict((int(k), v) for k, v in pairs)

    print('Loading {} with {} labels.'.format(args.model, args.labels))

    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    pygame.init()
    pygame.font.init()
    font = pygame.font.SysFont('Arial', 20)

    pygame.camera.init()
    camlist = pygame.camera.list_cameras()

    w, h, _ = common.input_image_size(interpreter)

    print('By default using camera: ', camlist[-1])
    camera = pygame.camera.Camera(camlist[-1], (cam_w, cam_h))
    try:
      display = pygame.display.set_mode((cam_w, cam_h), 0)
    except pygame.error as e:
      sys.stderr.write("\nERROR: Unable to open a display window. Make sure a monitor is attached and that "
            "the DISPLAY environment variable is set. Example: \n"
            ">export DISPLAY=\":0\" \n")
      raise e

    red = pygame.Color(255, 0, 0)

    camera.start()
    try:
        last_time = time.monotonic()
        while True:
            mysurface = camera.get_image()
            imagen = pygame.transform.scale(mysurface, (w, h))
            input = np.frombuffer(imagen.get_buffer(), dtype=np.uint8)
            start_time = time.monotonic()
            common.input_tensor(interpreter)[:,:] = np.reshape(input, (common.input_image_size(interpreter)))
            interpreter.invoke()
            results = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
            stop_time = time.monotonic()
            inference_ms = (stop_time - start_time)*1000.0
            fps_ms = 1.0 / (stop_time - last_time)
            last_time = stop_time
            annotate_text = 'Inference: {:5.2f}ms FPS: {:3.1f}'.format(inference_ms, fps_ms)
            for result in results:
               x0, y0, x1, y1 = list(result.bbox)
               rect = pygame.Rect(x0 * cam_w, y0 * cam_h, (x1 - x0) * cam_w, (y1 - y0) * cam_h)
               pygame.draw.rect(mysurface, red, rect, 1)
               label = '{:.0f}% {}'.format(100*result.score, labels.get(result.id, result.id))
               text = font.render(label, True, red)
               print(label, ' ', end='')
               mysurface.blit(text, (x0 * cam_w , y0 * cam_h))
            text = font.render(annotate_text, True, red)
            print(annotate_text)
            mysurface.blit(text, (0, 0))
            display.blit(mysurface, (0, 0))
            pygame.display.flip()
    finally:
        camera.stop()
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--camera_idx',
                        type=str,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument(
        '--model',
        type=str,
        help='Pose model to use. ',
        default=
        'models/posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite')
    parser.add_argument('--pose3d',
                        type=str,
                        help='3D Pose model to use. ',
                        default='models/3dpose_gan_edgetpu.tflite')
    parser.add_argument('--dataset',
                        type=str,
                        help='Type of dataset. ',
                        default="CORAL")
    parser.add_argument('--rot',
                        type=int,
                        help='Number of degree to rotate in 3D pose. ',
                        default=90)
    args = parser.parse_args()

    engine = PoseEngine(args.model)
    _, image_height, image_width, _ = engine.get_input_tensor_shape()
    interpreter_3dpose = None
    if len(args.pose3d) > 0:
        interpreter_3dpose = common.make_interpreter(args.pose3d)
        interpreter_3dpose.allocate_tensors()
    print("Load all models done!")

    cap = cv2.VideoCapture(args.camera_idx)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(cv2_im_rgb)
        pil_image.resize((image_width, image_height), Image.NEAREST)

        start_time = time.monotonic()
        poses, inference_time = engine.DetectPosesInImage(np.uint8(pil_image))
        print('2D Pose Inference time: %.fms (%.2f fps)' %
              (inference_time, 1000 / inference_time))

        cv2_im = draw_skel_and_kp(cv2_im, poses, args.rot, interpreter_3dpose)

        end_time = time.monotonic()
        process_time = 1000 * (end_time - start_time)
        print('3D Pose End-to-End Inference time: %.fms (%.2f fps)' %
              (process_time, 1000 / process_time))

        cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
        cv2.setWindowProperty("frame", cv2.WND_PROP_FULLSCREEN,
                              cv2.WINDOW_FULLSCREEN)
        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 18
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_labels = 'fer_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=1,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--videosrc',
                        help='Which video source to use. ',
                        default='/dev/video0')
    parser.add_argument('--videofmt',
                        help='Input video format.',
                        default='raw',
                        choices=['raw', 'h264', 'jpeg'])
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    face_interpreter = common.make_interpreter(
        os.path.join(default_model_dir, default_model))
    face_interpreter.allocate_tensors()
    # fer interpreter
    fer_interpreter = common.make_interpreter(args.model)
    fer_interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    w, h, _ = common.input_image_size(face_interpreter)
    inference_size = (w, h)
    # Average fps over last 30 frames.
    fps_counter = common.avg_fps_counter(30)

    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(face_interpreter, input_tensor)
        face_interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        objs = get_output(face_interpreter, args.threshold, args.top_k)
        # Get face detected part
        from PIL import Image
        im = Image.fromarray(common.input_tensor(face_interpreter))
        src_w, src_h = src_size
        inf_w, inf_h = inference_size
        results = []
        emo_objs = []
        for obj in objs:
            x0, y0, x1, y1 = list(obj.bbox)
            # Relative coordinates.
            x, y, w, h = x0, y0, x1 - x0, y1 - y0
            # Absolute coordinates, input tensor space.
            x, y, w, h = int(x * inf_w), int(y * inf_h), int(w * inf_w), int(
                h * inf_h)
            crop_rectangle = (x, y, x + w, y + h)
            # get face
            face = im.crop(crop_rectangle)
            face = np.array(face)
            # convert to grayscale
            #face = cv2.cvtColor(face, cv2.COLOR_RGB2GRAY)
            print(face.shape)
            face = cv2.resize(face, (224, 224))
            face = face.astype(np.uint8)
            #face /= float(face.max())
            face = np.reshape(face.flatten(), (224, 224, 3))
            # invoke fer interpreter
            common.set_input2(fer_interpreter, face)
            fer_interpreter.invoke()
            # process results
            results = get_emotion(fer_interpreter)
            if len(results) > 0:
                setattr(obj, "id", results[0].id)
                setattr(obj, "score", results[0].score)
                emo_objs.append(obj)
        objs = emo_objs
        end_time = time.monotonic()

        text_lines = []
        if len(objs) > 0:
            text_lines = [
                'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
                'FPS: {} fps'.format(round(next(fps_counter))),
            ]
            for result in results:
                text_lines.append('score={:.2f}: {}'.format(
                    result.score, labels.get(result.id, result.id)))
            #print(' '.join(text_lines))
        return generate_svg(src_size, inference_size, inference_box, objs,
                            labels, text_lines)

    result = gstreamer.run_pipeline(user_callback,
                                    src_size=(640, 480),
                                    appsink_size=inference_size,
                                    videosrc=args.videosrc,
                                    videofmt=args.videofmt)
Esempio n. 19
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_face_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # csv writer
    f = open('face_output.csv', 'w')
    with f:
        fnames = [
            'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin',
            'xmax', 'ymax', 'score'
        ]
        writer = csv.DictWriter(f, fieldnames=fnames)
        writer.writeheader()

        # read frames
        for image_path in sorted(
                glob.glob('/home/mendel/dataset/Store/frames/Camera01/*.jpg')):
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            #print(image_name)
            pil_im = Image.open(image_path)

            common.set_input(interpreter, pil_im)
            interpreter.invoke()
            objs = get_output(interpreter,
                              score_threshold=args.threshold,
                              top_k=args.top_k)
            (width, height) = pil_im.size
            idx = -1
            for obj in objs:
                x0, y0, x1, y1 = list(obj.bbox)
                x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                    x1 * width), int(y1 * height)
                score = obj.score
                label = 'face'
                idx += 1
                writer.writerow({
                    'timestamp': image_name,
                    'idx': idx,
                    'label': label,
                    'width': width,
                    'height': height,
                    'xmin': x0,
                    'ymin': y0,
                    'xmax': x1,
                    'ymax': y1,
                    'score': score
                })
Esempio n. 20
0
    'bitrate': 1000000,
    'source': '/dev/video2:YUY2:640x480:30/1',
    'window': 10,
    'top_k': 3,
    'threshold': 0.1,
    'print': False,
    'camera_idx': 2,
    'labels': os.path.join(default_model_dir, default_labels)
})

#Start up camera
cap = open_available_stream()

#Load up model, labels, and interpreter
print('Loading {} with {} labels.'.format(args.model, args.labels))
interpreter = common.make_interpreter(args.model)
interpreter.allocate_tensors()
labels = load_labels(args.labels)

#Start networktables
NetworkTables.initialize()
sd = NetworkTables.getTable("SmartDashboard2")


def main():

    #Run our flask app
    app.run(host='0.0.0.0', port=5000, use_reloader=False)

    #Cleaning up
    cap.release()
Esempio n. 21
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument(
        '--top_k',
        type=int,
        default=5,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='classifier score threshold')
    parser.add_argument('--video_width',
                        type=int,
                        help='Width resolution of the Video Capture',
                        default=1920)
    parser.add_argument('--video_height',
                        type=int,
                        help='Width resolution of the Video Capture',
                        default=1080)
    parser.add_argument(
        '--confirmations',
        type=int,
        help=
        'Frames detected with one or more person(s) needed before sending out an alert',
        default=30)
    parser.add_argument(
        '--time_period',
        type=int,
        help='Maximum time for confirmation check (in seconds)',
        default=10)
    parser.add_argument('--alert_cooldown',
                        type=int,
                        help='Cooldown time between alerts (in seconds)',
                        default=600)
    args = parser.parse_args()

    print('Loading {}.'.format(args.model))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()

    cap = cv2.VideoCapture(args.camera_idx)

    # set VideoCapture resolution
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.video_width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.video_height)

    cooldown_until = 0
    confirmations = array.array('d', [0] * args.confirmations)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        persons = list(filter(lambda x: x.id == PERSON_COCO_INDEX, objs))
        if persons:
            current_time = time.time()
            confirmations.append(current_time)
            if confirmations[-1] - confirmations.pop(
                    0
            ) <= args.time_period and confirmations[-1] >= cooldown_until:
                print("alerted at", current_time)
                image_link = upload_image(pil_im)
                send_alert("person detected at {}. \n{}".format(
                    datetime.datetime.now().isoformat(), image_link))
                cooldown_until = current_time + args.alert_cooldown
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 22
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(0)
    ###############################For sending to the socket
    #cap.set(3, 320);
    #cap.set(4, 240);

    img_counter = 0

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 90]
    ######################################################################33

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)

        #################SENDING TO THE SERVER#################3
        result, frame = cv2.imencode('.jpg', cv2_im, encode_param)
        data = pickle.dumps(frame, 0)
        size = len(data)
        clientsocket.sendall(struct.pack(">L", size) + data)
        #connection.sendall(struct.pack(">L", size) + data)
        img_counter += 1


#################################################################
#cv2.imshow('frame', cv2_im)
#if cv2.waitKey(1) & 0xFF == ord('q'):
#    break

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 23
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=10,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.3,
                        help='classifier score threshold')
    parser.add_argument('--class_ids',
                        nargs='*',
                        type=int,
                        default=0,
                        help='Array of class id')
    parser.add_argument('--input_files',
                        default='/home/mendel/dataset/*.jpg',
                        help='Input files')
    parser.add_argument('--csv_out',
                        default='detect_output.csv',
                        help='csv output file')
    args = parser.parse_args()
    if args.class_ids == 0:
        args.class_ids = [0]

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    # csv writer
    f = open(args.csv_out, 'w')
    with f:
        fnames = [
            'timestamp', 'idx', 'label', 'width', 'height', 'xmin', 'ymin',
            'xmax', 'ymax', 'score'
        ]
        writer = csv.DictWriter(f, fieldnames=fnames)
        writer.writeheader()

        # read frames
        inference_time = []
        for image_path in sorted(glob.glob(args.input_files)):
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            #print(image_name)
            pil_im = Image.open(image_path)

            # inference
            start = time.time()
            common.set_input(interpreter, pil_im)
            interpreter.invoke()
            objs = get_output(interpreter,
                              score_threshold=args.threshold,
                              top_k=args.top_k,
                              class_list=args.class_ids)
            inference_time.append(time.time() - start)

            # return results
            (width, height) = pil_im.size
            idx = -1
            for obj in objs:
                x0, y0, x1, y1 = list(obj.bbox)
                x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                    x1 * width), int(y1 * height)
                score = obj.score
                label = labels.get(obj.id, obj.id)
                idx += 1
                writer.writerow({
                    'timestamp': image_name,
                    'idx': idx,
                    'label': label,
                    'width': width,
                    'height': height,
                    'xmin': x0,
                    'ymin': y0,
                    'xmax': x1,
                    'ymax': y1,
                    'score': score
                })

        print("Inference time : {:.3f} ms".format(
            sum(inference_time) * 1000 / len(inference_time)))
        print("Frames per second : {:.2f} fps".format(
            len(inference_time) / sum(inference_time)))
Esempio n. 24
0
def main():
    # default_model_dir = '../all_models'
    # default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    # default_labels = 'coco_labels.txt'

    default_model_dir = '../cmpe297_model'
    # default_model = 'ssdlite_6C_SB_10K_mobiledet_screws.tflite'   #5 classes small BB
    # default_model = 'ssdlite_6C_SB_10K_mobiledet_screws_edgetpu.tflite' #5 classes small BB
    # default_model = 'ssdlite_6C_SB_25K_mobiledet_screws.tflite' #5 classes small BB
    default_model = 'ssdlite_6C_SB_25K_mobiledet_screws_edgetpu.tflite'  #5 classes small BB
    # default_model = 'ssdlite_6C_BB_10K_mobiledet_screws.tflite'  #5 classes big BB 1K
    # default_model = 'ssdlite_6C_BB_10K_mobiledet_screws_edgetpu.tflite'  #5 classes big BB 1K
    default_labels = 'ssdlite_mobiledet_screws_6c_labels.txt'

    # default_model = 'ssdlite_2C_BB_10K_mobiledet_screws.tflite'  #5 classes big BB 1K
    # default_model = 'ssdlite_2C_BB_10K_mobiledet_screws_edgetpu.tflite'  #5 classes big BB 1K
    # default_labels = 'ssdlite_mobiledet_screws_2c_labels.txt'

    # default_model_dir = '../cmpe297_model'
    # default_model = 'Sergio_v3_ssdlite_mobiledet_dog_vs_cat.tflite'
    # # default_model = 'Sergio_v3_sdlite_mobiledet_dog_vs_cat_edgetpu.tflite'
    # default_labels = 'cat_vs_doc_All.txt'

    # default_model = 'mobilenet_v2_1.0_224_quant_edgetpu_cmpe297.tflite'
    # # default_model = 'mobilenet_v2_1.0_224_quant_cmpe297.tflite'
    # default_labels = 'flower_labels_cmpe297.txt'

    # default_model = 'eager_mobilenet_v2_1.0_224_quant.tflite'  #no edgeTPU
    # default_model = 'eager_mobilenet_v2_1.0_224_quant_edgetpu.tflite'  #eager
    #
    # default_model = 'eager2_mobilenet_v2_1.0_224_quant.tflite'  #eager
    # default_model = 'eager2_mobilenet_v2_1.0_224_quant_edgetpu.tflite'  #eager
    # default_labels = 'duckylabels.txt'

    # default_model = 'quant_coco-tiny-v3-relu.tflite'
    # default_model = 'quant_coco-tiny-v3-relu_edgetpu.tflite'

    # default_model = 'ssdlite_mobiledet_dog_vs_cat_edgetpu.tflite'
    # default_labels = 'cat_vs_doc.txt'

    # default_model = 'cmpe297_ssdlite_mobiledet_dog.tflite'
    # default_model = 'cmpe297_ssdlite_mobiledet_dog_edgetpu.tflite'
    # default_model = 'cmpe297v2_ssdlite_mobiledet_dog_edgetpu.tflite'
    # default_labels = 'dogs_labels.txt'

    # default_model = 'ssdlite_mobiledet_dog_vs_cat_edgetpuAcha.tflite'
    # default_labels = 'cat_vs_doc_All.txt'

    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    parser.add_argument('--videosrc',
                        help='Which video source to use. ',
                        default='/dev/video0')
    parser.add_argument('--videofmt',
                        help='Input video format.',
                        default='raw',
                        choices=['raw', 'h264', 'jpeg'])
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    w, h, _ = common.input_image_size(interpreter)
    inference_size = (w, h)
    # Average fps over last 30 frames.
    fps_counter = common.avg_fps_counter(30)

    def user_callback(input_tensor, src_size, inference_box):
        nonlocal fps_counter
        start_time = time.monotonic()
        common.set_input(interpreter, input_tensor)
        interpreter.invoke()
        # For larger input image sizes, use the edgetpu.classification.engine for better performance
        objs = get_output(interpreter, args.threshold, args.top_k)
        #   print(objs[0].bbox)
        end_time = time.monotonic()
        text_lines = [
            'Inference: {:.2f} ms'.format((end_time - start_time) * 1000),
            'FPS: {} fps'.format(round(next(fps_counter))),
        ]
        print(' '.join(text_lines))
        return generate_svg(src_size, inference_size, inference_box, objs,
                            labels, text_lines)

    result = gstreamer.run_pipeline(user_callback,
                                    src_size=(640, 480),
                                    appsink_size=inference_size,
                                    videosrc=args.videosrc,
                                    videofmt=args.videofmt)
Esempio n. 25
0
def main():
    default_model_dir = 'all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        type=int,
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()
    multiTracker = cv2.MultiTracker_create()

    #Initialize logging files
    logging.basicConfig(filename='storage/results.log',
                        format='%(asctime)s-%(message)s',
                        level=logging.DEBUG)

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    vs = PiVideoStream(resolution=(2048, 1536), framerate=32).start()
    #cap = cv2.VideoCapture(args.camera_idx)
    cap = vs.stream
    #cap.set(3, 1920)
    #cap.set(4, 1440)
    # 4:3 resolutions
    # 640×480, 800×600, 960×720, 1024×768, 1280×960, 1400×1050,
    # 1440×1080 , 1600×1200, 1856×1392, 1920×1440, 2048×1536
    # 5 MP
    #cap.set(3, 2048)
    #cap.set(4, 1536)

    bboxes = []
    colors = []
    visitation = []
    trackers = []
    started_tracking = None
    last_tracked = None
    visitation_id = None
    save_one_with_boxes = False
    recording = False
    out = None
    fps = FPS().start()
    is_stopped = False
    current_fps = 4.0

    while vs is not None:
        try:
            frame = vs.read()

            if frame is not None:
                if fps._numFrames < 500:
                    fps.update()
                else:
                    fps.stop()
                    current_fps = fps.fps()
                    logging.info("[INFO] elasped time: {:.2f}".format(
                        fps.elapsed()))
                    logging.info("[INFO] approx. FPS: {:.2f}".format(
                        fps.fps()))
                    fps = FPS().start()

                success, boxes = multiTracker.update(frame)

                if success:
                    last_tracked = time.time()

                if len(boxes) > 0:
                    logging.info("success {}".format(success))
                    logging.info("boxes {}".format(boxes))

                cv2_im = frame
                #cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
                pil_im = Image.fromarray(cv2_im)

                common.set_input(interpreter, pil_im)
                interpreter.invoke()
                objs = get_output(interpreter,
                                  score_threshold=args.threshold,
                                  top_k=args.top_k)
                height, width, channels = cv2_im.shape

                bird_detected = False
                boxes_to_draw = []
                for obj in objs:
                    x0, y0, x1, y1 = list(obj.bbox)
                    x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                        x1 * width), int(y1 * height)
                    percent = int(100 * obj.score)
                    object_label = labels.get(obj.id, obj.id)
                    label = '{}% {}'.format(percent, object_label)
                    hdd = psutil.disk_usage('/')

                    if object_label == 'bird' and percent > 40:
                        bird_detected = True
                        new_bird = True

                        for bbox in boxes:
                            if intersects(bbox, obj.bbox):
                                logging.info("intersected.. same bird")
                                new_bird = False

                        if new_bird and len(bboxes) == 0:
                            logging.info("found a new bird")
                            visitation_id = uuid.uuid4()
                            started_tracking = time.time()
                            recording = True
                            save_one_with_boxes = True
                            bboxes.append(obj.bbox)
                            colors.append((randint(64, 255), randint(64, 255),
                                           randint(64, 255)))
                            tracker = cv2.TrackerCSRT_create()
                            trackers.append(tracker)
                            multiTracker.add(tracker, cv2_im, obj.bbox)

                        if hdd.percent < 95:
                            boxed_image_path = "storage/detected/boxed_{}_{}_{}.png".format(
                                time.strftime("%Y-%m-%d_%H-%M-%S"), percent,
                                visitation_id)
                            full_image_path = "storage/detected/full_{}_{}_{}.png".format(
                                time.strftime("%Y-%m-%d_%H-%M-%S"), percent,
                                visitation_id)
                            cv2.imwrite(boxed_image_path, cv2_im[y0:y1, x0:x1])
                            if percent > 95:
                                cv2.imwrite(full_image_path, cv2_im)

                        else:
                            logging.info("Not enough disk space")

                    percent = int(100 * obj.score)
                    object_label = labels.get(obj.id, obj.id)
                    label = '{}% {}'.format(percent, object_label)

                    # postpone drawing so we don't get lines in the photos
                    boxes_to_draw.append({
                        "p1": (x0, y0),
                        "p2": (x1, y1),
                        "label": label,
                        "label_p": (x0, y0 + 30)
                    })

                for box in boxes_to_draw:
                    cv2_im = cv2.rectangle(cv2_im, box["p1"], box["p2"],
                                           (0, 255, 0), 2)
                    cv2_im = cv2.putText(cv2_im, box["label"], box["label_p"],
                                         cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                         (255, 0, 0), 2)

                if recording == True:
                    if out == None:
                        fourcc = cv2.VideoWriter_fourcc(*'X264')
                        out = cv2.VideoWriter(
                            "storage/video/{}.avi".format(visitation_id),
                            fourcc, 4.0, (2048, 1536))
                    out.write(cv2_im)

                if bird_detected == True and save_one_with_boxes == True:
                    with_boxes_image_path = "storage/with_boxes/full_{}_{}.png".format(
                        time.strftime("%Y-%m-%d_%H-%M-%S"), visitation_id)
                    cv2.imwrite(with_boxes_image_path, cv2_im)
                    save_one_with_boxes = False

                if bird_detected == False and len(trackers) > 0:
                    now = time.time()
                    if now - last_tracked > 60:
                        logging.info("visitation {} lasted {} seconds".format(
                            visitation_id, now - started_tracking))
                        logging.info("clearing trackers")
                        for tracker in trackers:
                            tracker.clear()
                        multiTracker = cv2.MultiTracker_create()
                        boxes = []
                        colors = []
                        trackers = []
                        bboxes = []
                        recording = False
                        out.release()
                        out = None

                for i, newbox in enumerate(boxes):
                    x0, y0, x1, y1 = list(newbox)
                    x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                        x1 * width), int(y1 * height)
                    cv2_im = cv2.rectangle(cv2_im, (x0, y0), (x1, y1),
                                           (0, 0, 255), 2)

                cv2.namedWindow('Leroy', cv2.WINDOW_NORMAL)
                cv2.resizeWindow('Leroy', 800, 600)
                cv2.imshow('Leroy', cv2_im)

        except KeyboardInterrupt:
            print('Interrupted')
            try:
                sys.exit(0)
            except SystemExit:
                os._exit(0)
        except:
            logging.exception('Something happened.')
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    out.release()
    vs.stop()
Esempio n. 26
0
def main():
    default_model_dir = "../coral/models"
    default_label_dir = "../coral/labels"
    default_model = "ssd_mobilenet_v2_coco_quant_postprocess_edgetpu.tflite"
    default_labels = "coco_labels.txt"

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        help=".tflite model path",
        default=os.path.join(default_model_dir, default_model),
    )
    parser.add_argument(
        "--labels",
        help="label file path",
        default=os.path.join(default_label_dir, default_labels),
    )
    parser.add_argument(
        "--top_k",
        type=int,
        default=3,
        help="number of categories with highest score to display",
    )
    parser.add_argument("--camera_idx",
                        type=int,
                        help="Index of which video source to use. ",
                        default=0)
    parser.add_argument("--threshold",
                        type=float,
                        default=0.1,
                        help="classifier score threshold")
    args = parser.parse_args()

    print("Loading {} with {} labels.".format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(args.camera_idx)

    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        count += 1
        if not ret:
            break
        if count == 20:
            break
        cv2_im = frame

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)

        # cv2.imshow("frame", cv2_im)
        # if cv2.waitKey(1) & 0xFF == ord("q"):
        #     break

        # Saving the image
        cv2.imwrite("detectImage.jpg", cv2_im)

    cap.release()
    cv2.destroyAllWindows()
Esempio n. 27
0
def main():
    args = get_arguments()
    MODEL = args.model
    LABELS = args.labels
    TOP_K = args.top_k
    THRESHOLD = args.threshold
    CAPTURE_MODE = args.capture_mode
    INPUT_VIDEO_DIR = args.input_video_dir
    INPUT_VIDEO = str(os.path.join(INPUT_VIDEO_DIR, args.input_video))
    OUTPUT_VIDEO_DIR = args.output_video_dir
    OUTPUT_VIDEO = str(os.path.join(OUTPUT_VIDEO_DIR, args.output_video))
    OUTPUT_VIDEO_RES = args.output_video_res
    OUTPUT_VIDEO_FPS = args.output_video_fps
    OUTPUT_VIDEO_TIME = args.output_video_time
    
    RECORD_MODE = args.record_mode
    MONITOR = args.monitor
    DEBUG_MODE = args.debug_mode
    
    # Define the hardware
    ui = get_ui()
    
    # Create an interpreter engine based on the tflite model path given
    debug_print("Loading {} with {} labels.".format(MODEL, LABELS))
    interpreter = common.make_interpreter(MODEL)
    interpreter.allocate_tensors()

    cap = None
    out = None

    # Define camera or input video properties
    cap = load_cap(CAPTURE_MODE, INPUT_VIDEO)
    cap_fps = cap.get(cv2.CAP_PROP_FPS)
    cap_width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    
    # Define output file if recording
    out = load_out(cap, CAPTURE_MODE, OUTPUT_VIDEO, OUTPUT_VIDEO_RES, cap_fps, OUTPUT_VIDEO_FPS)

    labels = load_labels(LABELS)
    frame_count = 0

    # Read frame of the capture and feed it to the interpreter. Feed out an editted frame with detected objects encapuslated by bounding box.
    # Based on the object(s) captured, replay bluetooth message to the ardunio that controls the stepper motor and animatronics.
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        objs = []
        cv2_im, objs = detect(frame, interpreter, labels, THRESHOLD, TOP_K)
        
        display_frame(cv2_im)

        relay_bluetooth_detection(ui, objs, labels, cap_width, cap_height)
        
        if RECORD_MODE:
            out.write(cv2_im)
        
        if DEBUG_MODE:
            frame_count += 1
            seconds = (frame_count/cap_fps)%60
            debug_print(seconds)
                                   
        if (cv2.waitKey(1) & 0xFF == ord('q')) or ((RECORD_MODE or DEBUG_MODE) and seconds > OUTPUT_VIDEO_TIME):
            debug_print("Ending capture")
            break
    
    cleanup(cap, out)

    print("Successful termination")
Esempio n. 28
0
def main():
    default_model_dir = './'  #'../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        help='.tflite model path',
                        default=os.path.join(default_model_dir, default_model))
    parser.add_argument('--labels',
                        help='label file path',
                        default=os.path.join(default_model_dir,
                                             default_labels))
    parser.add_argument(
        '--top_k',
        type=int,
        default=3,
        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx',
                        help='Index of which video source to use. ',
                        default=0)
    parser.add_argument('--threshold',
                        type=float,
                        default=0.5,
                        help='classifier score threshold')
    parser.add_argument('--min_angle',
                        type=float,
                        default=0.0,
                        help='minimum angle for sweep')
    parser.add_argument('--max_angle',
                        type=float,
                        default=180.0,
                        help='maximum angle for sweep')

    args = parser.parse_args()

    print('Initializing servo')
    servo, servoData = ServoControls.init(
        0,
        sweepStepTracking=0.1,
        minAngle=args.min_angle,
        maxAngle=args.max_angle)  #SERVO_PIN, 50)
    servoThread = _thread.start_new_thread(control_servo, (servo, servoData))

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    #    cv2.VideoCapture.set(v_c, cv2.CAP_PROP_FPS, 15)

    cap = cv2.VideoCapture(args.camera_idx)
    # Read first frame to get window frame shape
    _, frame = cap.read()
    if frame is None:
        raise Exception('Image not found!')
    frameH, frameW, frameChannels = frame.shape

    lastTargetLost = None
    lastTargetLostTime = datetime.datetime.now()
    targetState = TargetState.UNKNOWN

    play_sound('searching.mp3')

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        #frame = cv2.flip(frame, flipCode=1)
        frame = imutils.rotate(frame, 90)
        h, w, layers = frame.shape
        aspect_ratio = w / h
        cv2_im = frame  # cv2.resize(frame, (1920, 1080))

        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter,
                          score_threshold=args.threshold,
                          top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)

        face = next(filter(lambda a: a.id == 0, objs), None)

        if face != None:
            if targetState == TargetState.UNKNOWN:
                targetState = TargetState.ACQUIRED
            height, width, channels = cv2_im.shape

            x0, y0, x1, y1 = list(face.bbox)
            x0, y0, x1, y1 = int(x0 * width), int(y0 * height), int(
                x1 * width), int(y1 * height)

            lastTargetLost = 0
            servoData['targetCoordinates'] = [
                round(abs((x0 + (x1)) / 2)),
                round(abs((y0 + (y1)) / 2))
            ]
        else:
            # target may have been lost
            if targetState == TargetState.TRACKING:
                targetState = TargetState.LOST
                # track lost time
                lastTargetLostTime = datetime.datetime.now()
            if targetState == TargetState.LOST and (
                    lastTargetLostTime == None or
                (datetime.datetime.now() - lastTargetLostTime).seconds > 2):
                # if lost for over a second, reset targetState back to default
                servoData['targetCoordinates'] = [-1, -1]
                play_sound('are-still-there.mp3')
                targetState = TargetState.UNKNOWN
                lastTargetLostTime = None

        if targetState == TargetState.ACQUIRED:
            play_sound(TARGET_ACQUIRED)
            targetState = TargetState.TRACKING

        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    RUNNING = False
    cap.release()
    cv2.destroyAllWindows()
Esempio n. 29
0
def main():
    args = get_arguments()
    MODEL = args.model
    LABELS = args.labels
    TOP_K = args.top_k
    THRESHOLD = args.threshold
    CAPTURE_MODE = args.capture_mode
    INPUT_VIDEO_DIR = args.input_video_dir
    INPUT_VIDEO = str(os.path.join(INPUT_VIDEO_DIR, args.input_video))
    OUTPUT_VIDEO_DIR = args.output_video_dir
    OUTPUT_VIDEO = str(os.path.join(OUTPUT_VIDEO_DIR, args.output_video))
    OUTPUT_VIDEO_RES = args.output_video_res
    OUTPUT_VIDEO_FPS = args.output_video_fps
    OUTPUT_VIDEO_TIME = args.output_video_time

    RECORD_MODE = args.record_mode
    MONITOR = args.monitor
    DEBUG_MODE = args.debug_mode

    ui = get_ui()

    debug_print("Loading {} with {} labels.".format(MODEL, LABELS))
    interpreter = common.make_interpreter(MODEL)
    interpreter.allocate_tensors()

    cap = None
    out = None

    cap = load_cap(CAPTURE_MODE, INPUT_VIDEO)
    cap_fps = cap.get(cv2.CAP_PROP_FPS)
    cap_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    cap_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

    out = load_out(cap, CAPTURE_MODE, OUTPUT_VIDEO, OUTPUT_VIDEO_RES, cap_fps,
                   OUTPUT_VIDEO_FPS)

    labels = load_labels(LABELS)
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        objs = []
        cv2_im, objs = detect(frame, interpreter, labels, THRESHOLD, TOP_K)
        display_frame(cv2_im)

        relay_bluetooth_detection(ui, objs, cap_width, cap_height)

        if RECORD_MODE:
            out.write(cv2_im)
            frame_count += 1
            seconds = (frame_count / cap_fps) % 60
            debug_print(seconds)

        if (cv2.waitKey(1) & 0xFF
                == ord('q')) or (RECORD_MODE and seconds > OUTPUT_VIDEO_TIME):
            debug_print("Ending capture")
            break

    cleanup(cap, out)

    print("Successful termination")
Esempio n. 30
0
def main():
    default_model_dir = '../all_models'
    default_model = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite'
    default_labels = 'coco_labels.txt'
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='.tflite model path',
                        default=os.path.join(default_model_dir,default_model))
    parser.add_argument('--labels', help='label file path',
                        default=os.path.join(default_model_dir, default_labels))
    parser.add_argument('--top_k', type=int, default=3,
                        help='number of categories with highest score to display')
    parser.add_argument('--camera_idx', type=int, help='Index of which video source to use. ', default = 0)
    parser.add_argument('--threshold', type=float, default=0.1,
                        help='classifier score threshold')
    args = parser.parse_args()

    print('Loading {} with {} labels.'.format(args.model, args.labels))
    interpreter = common.make_interpreter(args.model)
    interpreter.allocate_tensors()
    labels = load_labels(args.labels)

    cap = cv2.VideoCapture(1)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        cv2_im = frame
        cv2_im = imutils.resize(frame, width=640)
        gray = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2GRAY)
        prev_yawn_status = yawnStatus
        cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(cv2_im_rgb)

        common.set_input(interpreter, pil_im)
        interpreter.invoke()
        objs = get_output(interpreter, score_threshold=args.threshold, top_k=args.top_k)
        cv2_im = append_objs_to_img(cv2_im, objs, labels)
        rects = detector(gray, 0)
        for rect in rects:
            shape = predictor(gray, rect)
            shape = face_utils.shape_to_np(shape)
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            mouth = shape[mStart:mEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            mouEAR = mouth_aspect_ratio(mouth)
            ear = (leftEAR + rightEAR) / 2.0
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)
            mouthHull = cv2.convexHull(mouth)
            if ear < EYE_AR_THRESH:
                COUNTER += 1
                cv2.putText(cv2_im, "Eyes Closed ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                if COUNTER >= EYE_AR_CONSEC_FRAMES:
                    cv2.putText(cv2_im, "DROWSINESS ALERT!", (10, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                    if not ALARM_ON:
                        ALARM_ON = True
                        threadStatusQ.put(not ALARM_ON)
                        thread = Thread(target=soundAlert, args=(sound_path, threadStatusQ,))
                        thread.setDaemon(True)
                        thread.start()
                else:
                    ALARM_ON=False
            else:
                COUNTER = 0
                cv2.putText(cv2_im, "Eyes Open ", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
            if mouEAR > MOU_AR_THRESH:
                cv2.putText(cv2_im, "Yawning ", (10, 70),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),2)
                yawnStatus = True
                output_text = "Yawn Count: " + str(yawns + 1)
                cv2.putText(cv2_im, output_text, (10,100),cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255,0,0),2)
            else:
                yawnStatus = False
            if prev_yawn_status == True and yawnStatus == False:
                yawns+=1
        
        cv2.imshow('frame', cv2_im)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()