Esempio n. 1
0
    def run(self):

        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        # Define data queues 
        if self.camera:
            q_video = device.getOutputQueue(name="cam_out", maxSize=1, blocking=False)
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=1, blocking=False)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=2, blocking=False)
                q_lm_in = device.getInputQueue(name="lm_in")
        else:
            q_pd_in = device.getInputQueue(name="pd_in")
            q_pd_out = device.getOutputQueue(name="pd_out", maxSize=4, blocking=True)
            if self.use_lm:
                q_lm_out = device.getOutputQueue(name="lm_out", maxSize=4, blocking=True)
                q_lm_in = device.getInputQueue(name="lm_in")

        self.fps = FPS(mean_nb_frames=20)

        seq_num = 0
        nb_pd_inferences = 0
        nb_lm_inferences = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0
        while True:
            self.fps.update()
            if self.camera:
                in_video = q_video.get()
                video_frame = in_video.getCvFrame()
            else:
                if self.image_mode:
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                dx = (w - self.video_size) // 2
                dy = (h - self.video_size) // 2
                video_frame = vid_frame[dy:dy+self.video_size, dx:dx+self.video_size]
                frame_nn = dai.ImgFrame()
                frame_nn.setSequenceNum(seq_num)
                frame_nn.setWidth(self.pd_input_length)
                frame_nn.setHeight(self.pd_input_length)
                frame_nn.setData(to_planar(video_frame, (self.pd_input_length, self.pd_input_length)))
                q_pd_in.send(frame_nn)
                pd_rtrip_time = now()

                seq_num += 1

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            if not self.camera: glob_pd_rtrip_time += now() - pd_rtrip_time
            self.pd_postprocess(inference)
            self.pd_render(annotated_frame)
            nb_pd_inferences += 1

            # Hand landmarks
            if self.use_lm:
                for i,r in enumerate(self.regions):
                    img_hand = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_input_length, self.lm_input_length)
                    nn_data = dai.NNData()   
                    nn_data.setLayer("input_1", to_planar(img_hand, (self.lm_input_length, self.lm_input_length)))
                    q_lm_in.send(nn_data)
                    if i == 0: lm_rtrip_time = now() # We measure only for the first region
                
                # Retrieve hand landmarks
                for i,r in enumerate(self.regions):
                    inference = q_lm_out.get()
                    if i == 0: glob_lm_rtrip_time += now() - lm_rtrip_time
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
                    nb_lm_inferences += 1

                
            self.fps.display(annotated_frame, orig=(50,50),color=(240,180,100))
            cv2.imshow("video", annotated_frame)

            key = cv2.waitKey(1) 
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_handedness = not self.show_handedness
            elif key == ord('6'):
                self.show_scores = not self.show_scores
            elif key == ord('7'):
                self.show_gesture = not self.show_gesture

        # Print some stats
        if not self.camera:
            print(f"# video files frames                 : {seq_num}")
            print(f"# palm detection inferences received : {nb_pd_inferences}")
            print(f"# hand landmark inferences received  : {nb_lm_inferences}")
            print(f"Palm detection round trip            : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms")
            print(f"Hand landmark round trip             : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms")
Esempio n. 2
0
    face_det_nn.passthrough.link(objectTracker.inputTrackerFrame)
    face_det_nn.out.link(objectTracker.inputDetections)
    # Send face detections to the host (for bounding boxes)

    pass_xout = pipeline.create(dai.node.XLinkOut)
    pass_xout.setStreamName("pass_out")
    objectTracker.passthroughTrackerFrame.link(pass_xout.input)

    tracklets_xout = pipeline.create(dai.node.XLinkOut)
    tracklets_xout.setStreamName("tracklets")
    objectTracker.out.link(tracklets_xout.input)
    print("Pipeline created.")
    return pipeline


with dai.Device(create_pipeline()) as device:
    frame_q = device.getOutputQueue("frame")
    tracklets_q = device.getOutputQueue("tracklets")
    pass_q = device.getOutputQueue("pass_out")
    sync = HostSync()
    while True:
        sync.add_msg("color", frame_q.get())

        # Using tracklets instead of ImgDetections in case NN inaccuratelly detected face, so blur
        # will still happen on all tracklets (even LOST ones)
        nn_in = tracklets_q.tryGet()
        if nn_in is not None:
            seq = pass_q.get().getSequenceNum()
            msgs = sync.get_msgs(seq)

            if not 'color' in msgs: continue
Esempio n. 3
0
manip = pipeline.createImageManip()
manip.setResize(300, 300)
# The NN model expects BGR input. By default ImageManip output type would be same as input (gray in this case)
manip.setFrameType(dai.RawImgFrame.Type.BGR888p)
depth.rectifiedLeft.link(manip.inputImage)
manip.out.link(detection_nn.input)

xout_manip = pipeline.createXLinkOut()
xout_manip.setStreamName("manip")
manip.out.link(xout_manip.input)

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)

device = dai.Device(pipeline)
device.startPipeline()

q_left = device.getOutputQueue(name="rect_left", maxSize=8, overwrite=True)
q_manip = device.getOutputQueue(name="manip", maxSize=8, overwrite=True)
q_depth = device.getOutputQueue(name="depth", maxSize=8, overwrite=True)
q_nn = device.getOutputQueue(name="nn", maxSize=8, overwrite=True)
q_rgb_enc = device.getOutputQueue(name="h265", maxSize=8, overwrite=True)

frame_left = None
frame_manip = None
frame_depth = None
bboxes = []


def frame_norm(frame, bbox):
Esempio n. 4
0
parser.add_argument('-m',
                    '--time',
                    type=float,
                    default=float("inf"),
                    help="Finish execution after X seconds")
parser.add_argument('-af',
                    '--autofocus',
                    type=str,
                    default=None,
                    help="Set AutoFocus mode of the RGB camera",
                    choices=list(
                        filter(lambda name: name.startswith("AF_"),
                               vars(depthai.AutofocusMode))))
args = parser.parse_args()

device = depthai.Device('', False)
device.request_af_mode(
    getattr(depthai.AutofocusMode, args.autofocus,
            depthai.AutofocusMode.AF_MODE_AUTO))

dest = Path(args.path).resolve().absolute()
if dest.exists() and len(list(dest.glob('*'))) != 0 and not args.dirty:
    raise ValueError(
        f"Path {dest} contains {len(list(dest.glob('*')))} files. Either specify new path or use \"--dirty\" flag to use current one"
    )
dest.mkdir(parents=True, exist_ok=True)

p = device.create_pipeline(
    config={
        "streams": ["left", "right", "color", "disparity_color"],
        "ai": {
Esempio n. 5
0
    def startLoop(self):
        cliArgs = CliArgs()
        args = vars(cliArgs.parse_args())

        configMan = DepthConfigManager(args)
        if is_rpi and args['pointcloud']:
            raise NotImplementedError(
                "Point cloud visualization is currently not supported on RPI")
        # these are largely for debug and dev.
        cmd_file, debug_mode = configMan.getCommandFile()
        usb2_mode = configMan.getUsb2Mode()

        # decode_nn and show_nn are functions that are dependent on the neural network that's being run.
        decode_nn = configMan.decode_nn
        show_nn = configMan.show_nn

        # Labels for the current neural network. They are parsed from the blob config file.
        labels = configMan.labels
        NN_json = configMan.NN_config

        # This json file is sent to DepthAI. It communicates what options you'd like to enable and what model you'd like to run.
        config = configMan.jsonConfig

        # Create a list of enabled streams ()
        stream_names = [
            stream if isinstance(stream, str) else stream['name']
            for stream in configMan.stream_list
        ]

        enable_object_tracker = 'object_tracker' in stream_names

        # grab video file, if option exists
        video_file = configMan.video_file

        self.device = None
        if debug_mode:
            print('Cmd file: ', cmd_file, ' args["device_id"]: ',
                  args['device_id'])
            self.device = depthai.Device(cmd_file, args['device_id'])
        else:
            self.device = depthai.Device(args['device_id'], usb2_mode)

        print(stream_names)
        print('Available streams: ' + str(self.device.get_available_streams()))

        # create the pipeline, here is the first connection with the device
        p = self.device.create_pipeline(config=config)

        if p is None:
            print('Pipeline is not created.')
            exit(3)

        nn2depth = self.device.get_nn_to_depth_bbox_mapping()

        t_start = time()
        frame_count = {}
        frame_count_prev = {}
        nnet_prev = {}
        nnet_prev["entries_prev"] = {}
        nnet_prev["nnet_source"] = {}
        frame_count['nn'] = {}
        frame_count_prev['nn'] = {}

        NN_cams = {'rgb', 'left', 'right'}

        for cam in NN_cams:
            nnet_prev["entries_prev"][cam] = None
            nnet_prev["nnet_source"][cam] = None
            frame_count['nn'][cam] = 0
            frame_count_prev['nn'][cam] = 0

        stream_windows = []
        for s in stream_names:
            if s == 'previewout':
                for cam in NN_cams:
                    stream_windows.append(s + '-' + cam)
            else:
                stream_windows.append(s)

        for w in stream_windows:
            frame_count[w] = 0
            frame_count_prev[w] = 0

        tracklets = None

        self.reset_process_wd()

        time_start = time()

        def print_packet_info_header():
            print(
                '[hostTimestamp streamName] devTstamp seq camSrc width height Bpp'
            )

        def print_packet_info(packet, stream_name):
            meta = packet.getMetadata()
            print("[{:.6f} {:15s}]".format(time() - time_start, stream_name),
                  end='')
            if meta is not None:
                source = meta.getCameraName()
                if stream_name.startswith(
                        'disparity') or stream_name.startswith('depth'):
                    source += '(rectif)'
                print(" {:.6f}".format(meta.getTimestamp()),
                      meta.getSequenceNum(),
                      source,
                      end='')
                print('',
                      meta.getFrameWidth(),
                      meta.getFrameHeight(),
                      meta.getFrameBytesPP(),
                      end='')
            print()
            return

        def keypress_handler(self, key, stream_names):
            cams = ['rgb', 'mono']
            self.cam_idx = getattr(self, 'cam_idx', 0)  # default: 'rgb'
            cam = cams[self.cam_idx]
            cam_c = depthai.CameraControl.CamId.RGB
            cam_l = depthai.CameraControl.CamId.LEFT
            cam_r = depthai.CameraControl.CamId.RIGHT
            cmd_ae_region = depthai.CameraControl.Command.AE_REGION
            cmd_exp_comp = depthai.CameraControl.Command.EXPOSURE_COMPENSATION
            cmd_set_focus = depthai.CameraControl.Command.MOVE_LENS
            cmd_set_exp = depthai.CameraControl.Command.AE_MANUAL
            keypress_handler_lut = {
                ord('f'):
                lambda: self.device.request_af_trigger(),
                ord('1'):
                lambda: self.device.request_af_mode(depthai.AutofocusMode.
                                                    AF_MODE_AUTO),
                ord('2'):
                lambda: self.device.request_af_mode(depthai.AutofocusMode.
                                                    AF_MODE_CONTINUOUS_VIDEO),
                # 5,6,7,8,9,0: short example for using ISP 3A controls for Mono cameras
                ord('5'):
                lambda: self.device.send_camera_control(
                    cam_l, cmd_ae_region, '0 0 200 200 1'),
                ord('6'):
                lambda: self.device.send_camera_control(
                    cam_l, cmd_ae_region, '1000 0 200 200 1'),
                ord('7'):
                lambda: self.device.send_camera_control(
                    cam_l, cmd_exp_comp, '-2'),
                ord('8'):
                lambda: self.device.send_camera_control(
                    cam_l, cmd_exp_comp, '+2'),
                ord('9'):
                lambda: self.device.send_camera_control(
                    cam_r, cmd_exp_comp, '-2'),
                ord('0'):
                lambda: self.device.send_camera_control(
                    cam_r, cmd_exp_comp, '+2'),
            }
            if key in keypress_handler_lut:
                keypress_handler_lut[key]()
            elif key == ord('c'):
                if 'jpegout' in stream_names:
                    self.device.request_jpeg()
                else:
                    print(
                        "'jpegout' stream not enabled. Try settings -s jpegout to enable it"
                    )
            elif key == ord(
                    's'):  # switch selected camera for manual exposure control
                self.cam_idx = (self.cam_idx + 1) % len(cams)
                print("======================= Current camera to control:",
                      cams[self.cam_idx])
            # RGB manual focus/exposure controls:
            # Control:      key[dec/inc]  min..max
            # exposure time:     i   o    1..33333 [us]
            # sensitivity iso:   k   l    100..1600
            # focus:             ,   .    0..255 [far..near]
            elif key == ord('i') or key == ord('o') or key == ord(
                    'k') or key == ord('l'):
                max_exp_us = int(1000 * 1000 / config['camera'][cam]['fps'])
                self.rgb_exp = getattr(self, 'rgb_exp', 20000)  # initial
                self.rgb_iso = getattr(self, 'rgb_iso', 800)  # initial
                rgb_iso_step = 50
                rgb_exp_step = max_exp_us // 20  # split in 20 steps
                if key == ord('i'): self.rgb_exp -= rgb_exp_step
                if key == ord('o'): self.rgb_exp += rgb_exp_step
                if key == ord('k'): self.rgb_iso -= rgb_iso_step
                if key == ord('l'): self.rgb_iso += rgb_iso_step
                if self.rgb_exp < 1: self.rgb_exp = 1
                if self.rgb_exp > max_exp_us: self.rgb_exp = max_exp_us
                if self.rgb_iso < 100: self.rgb_iso = 100
                if self.rgb_iso > 1600: self.rgb_iso = 1600
                print("===================================", cam,
                      "set exposure:", self.rgb_exp, "iso:", self.rgb_iso)
                exp_arg = str(self.rgb_exp) + ' ' + str(
                    self.rgb_iso) + ' 33333'
                if cam == 'rgb':
                    self.device.send_camera_control(cam_c, cmd_set_exp,
                                                    exp_arg)
                elif cam == 'mono':
                    self.device.send_camera_control(cam_l, cmd_set_exp,
                                                    exp_arg)
                    self.device.send_camera_control(cam_r, cmd_set_exp,
                                                    exp_arg)
            elif key == ord(',') or key == ord('.'):
                self.rgb_manual_focus = getattr(self, 'rgb_manual_focus',
                                                200)  # initial
                rgb_focus_step = 3
                if key == ord(','): self.rgb_manual_focus -= rgb_focus_step
                if key == ord('.'): self.rgb_manual_focus += rgb_focus_step
                if self.rgb_manual_focus < 0: self.rgb_manual_focus = 0
                if self.rgb_manual_focus > 255: self.rgb_manual_focus = 255
                print(
                    "========================================== RGB set focus:",
                    self.rgb_manual_focus)
                focus_arg = str(self.rgb_manual_focus)
                self.device.send_camera_control(cam_c, cmd_set_focus,
                                                focus_arg)
            return

        for stream in stream_names:
            if stream in ["disparity", "disparity_color", "depth"]:
                cv2.namedWindow(stream)
                trackbar_name = 'Disparity confidence'
                conf_thr_slider_min = 0
                conf_thr_slider_max = 255
                cv2.createTrackbar(trackbar_name, stream, conf_thr_slider_min,
                                   conf_thr_slider_max,
                                   self.on_trackbar_change)
                cv2.setTrackbarPos(trackbar_name, stream,
                                   args['disparity_confidence_threshold'])

        right_rectified = None
        pcl_converter = None

        ops = 0
        prevTime = time()
        if args['verbose']: print_packet_info_header()
        while self.runThread:
            # retreive data from the device
            # data is stored in packets, there are nnet (Neural NETwork) packets which have additional functions for NNet result interpretation
            self.nnet_packets, self.data_packets = p.get_available_nnet_and_data_packets(
                blocking=True)

            ### Uncomment to print ops
            # ops = ops + 1
            # if time() - prevTime > 1.0:
            #     print('OPS: ', ops)
            #     ops = 0
            #     prevTime = time()

            packets_len = len(self.nnet_packets) + len(self.data_packets)
            if packets_len != 0:
                self.reset_process_wd()
            else:
                cur_time = monotonic()
                if cur_time > wd_cutoff:
                    print("process watchdog timeout")
                    os._exit(10)

            for _, nnet_packet in enumerate(self.nnet_packets):
                if args['verbose']: print_packet_info(nnet_packet, 'NNet')

                meta = nnet_packet.getMetadata()
                camera = 'rgb'
                if meta != None:
                    camera = meta.getCameraName()
                nnet_prev["nnet_source"][camera] = nnet_packet
                nnet_prev["entries_prev"][camera] = decode_nn(nnet_packet,
                                                              config=config,
                                                              NN_json=NN_json)
                frame_count['metaout'] += 1
                frame_count['nn'][camera] += 1

            for packet in self.data_packets:
                window_name = packet.stream_name
                if packet.stream_name not in stream_names:
                    continue  # skip streams that were automatically added
                if args['verbose']:
                    print_packet_info(packet, packet.stream_name)
                packetData = packet.getData()
                if packetData is None:
                    print('Invalid packet data!')
                    continue
                elif packet.stream_name == 'previewout':
                    meta = packet.getMetadata()
                    camera = 'rgb'
                    if meta != None:
                        camera = meta.getCameraName()

                    window_name = 'previewout-' + camera
                    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                    # the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we
                    # change shape (3, 300, 300) -> (300, 300, 3)
                    data0 = packetData[0, :, :]
                    data1 = packetData[1, :, :]
                    data2 = packetData[2, :, :]
                    #                     print(packet.stream_name) # == previewout

                    frame = cv2.merge([data0, data1, data2])

                    ##################################################################################################
                    ##################################################################################################

                    if nnet_prev["entries_prev"][camera] is not None:

                        frame = show_nn(nnet_prev["entries_prev"][camera],
                                        frame,
                                        NN_json=NN_json,
                                        config=config)
                        temp = nnet_prev["entries_prev"][
                            camera]  # temp is a list of a dictionary

                        #                         SEEx_frame =

                        # this will only handle one balloon I believe
                        if len(temp) > 0:
                            dict_var = temp[0]  # dictionary
                            object_id = dict_var["class_id"]

                            if object_id == 0:  # blue ballon for 0
                                balloon_color = "BLUE"
                                GPIO.output(17, True)
                                GPIO.output(4, False)
#                                 exit()

                            if object_id == 1:  # red balloon ffor 1
                                balloon_color = "RED"
                                GPIO.output(17, False)
                                GPIO.output(4, True)
#                                 exit()

                            x = dict_var["depth_x"]
                            y = dict_var["depth_y"]
                            z = dict_var["depth_z"]

                            print(balloon_color, x, y, z)
                        else:
                            GPIO.output(17, False)
                            GPIO.output(4, False)

##################################################################################################
##################################################################################################

                        if enable_object_tracker and tracklets is not None:
                            frame = show_tracklets(tracklets, frame, labels)
#                     cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
#                     cv2.putText(frame, "NN fps: " + str(frame_count_prev['nn'][camera]), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0))
#                     print("line 290")
#                     print(window_name)

# make copy of image
                    passed_image = np.copy(frame)
                    #
                    # Contours
                    #                     contourImage(passed_image)
                    #                     cv2.imshow("dev", image_with_lines) # show lines on a dev feed

                    # Lines
                    height = passed_image.shape[0]
                    width = passed_image.shape[1]

                    # make a canny image
                    canny_image = canny(passed_image)

                    # regions of interest
                    # must be an array of polygons
                    roi_left = np.array([[(0, height), (width, height),
                                          (int(width / 2), height),
                                          (int(width / 2), 0)]])

                    #                     roi_right = np.array([
                    #                     [(int(width/2), 0), (int(width/2), height), (width, height), (width,0)]
                    #                     ])

                    # left and right cropped images
                    cropped_left = region_of_interest(canny_image, roi_left)
                    #                     cropped_right = region_of_interest(canny_image, roi_right)

                    #                     cv2.imshow("Cropped Left", cropped_left)

                    # lines on left and right
                    left_lines = cv2.HoughLinesP(cropped_left,
                                                 100,
                                                 np.pi / 180,
                                                 100,
                                                 np.array([]),
                                                 minLineLength=5,
                                                 maxLineGap=5)
                    #                     right_lines = cv2.HoughLinesP(cropped_right, 100,np.pi/180, 10, np.array([]), minLineLength = 10, maxLineGap = 5)

                    if np.size(left_lines) > 1:
                        # averaged lines
                        ave_left_line_image = average_slope_intercept(
                            passed_image, left_lines)
                        #                     ave_right_line_image = average_slope_intercept(passed_image, right_lines)

                        # Line images
                        left_line_image = display_lines(
                            passed_image, ave_left_line_image)

    #                     right_line_image = display_lines(passed_image,ave_right_line_image)

    # Display Lines
#                         print("left ave")
#                         left_combo_image = cv2.addWeighted(passed_image, 0.8, left_line_image,1,1)
#                         cv2.imshow("LEFT", left_combo_image)

#                     if right_line_image is not None:
#                         right_combo_image = cv2.addWeighted(passed_image, 0.8, right_line_image,1,1)
#                         cv2.imshow("RIGHT", right_combo_image)

                    cv2.imshow(window_name,
                               passed_image)  # show depthai OG feed


#######################################################################################################################

                elif packet.stream_name in [
                        'left', 'right', 'disparity', 'rectified_left',
                        'rectified_right'
                ]:
                    frame_bgr = packetData
                    if args['pointcloud'] and packet.stream_name == 'rectified_right':
                        right_rectified = packetData
                    cv2.putText(frame_bgr, packet.stream_name, (25, 25),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                    cv2.putText(frame_bgr,
                                "fps: " + str(frame_count_prev[window_name]),
                                (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                (0, 0, 0))
                    print("line 298")

                    if args['draw_bb_depth']:
                        camera = args['cnn_camera']
                        if packet.stream_name == 'disparity':
                            if camera == 'left_right':
                                camera = 'right'
                        elif camera != 'rgb':
                            camera = packet.getMetadata().getCameraName()
                        if nnet_prev["entries_prev"][camera] is not None:
                            frame_bgr = show_nn(
                                nnet_prev["entries_prev"][camera],
                                frame_bgr,
                                NN_json=NN_json,
                                config=config,
                                nn2depth=nn2depth)
                    cv2.imshow(window_name, frame_bgr)
                elif packet.stream_name.startswith(
                        'depth') or packet.stream_name == 'disparity_color':
                    frame = packetData

                    if len(frame.shape) == 2:
                        if frame.dtype == np.uint8:  # grayscale
                            cv2.putText(frame, packet.stream_name, (25, 25),
                                        cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                        (0, 0, 255))
                            cv2.putText(
                                frame,
                                "fps: " + str(frame_count_prev[window_name]),
                                (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                (0, 0, 255))
                            print("line 317")
                        else:  # uint16
                            if args['pointcloud'] and "depth" in stream_names and "rectified_right" in stream_names and right_rectified is not None:
                                try:
                                    from depthai_helpers.projector_3d import PointCloudVisualizer
                                except ImportError as e:
                                    raise ImportError(
                                        f"\033[1;5;31mError occured when importing PCL projector: {e} \033[0m "
                                    )
                                if pcl_converter is None:
                                    pcl_converter = PointCloudVisualizer(
                                        self.device.get_right_intrinsic(),
                                        1280, 720)
                                right_rectified = cv2.flip(right_rectified, 1)
                                pcl_converter.rgbd_to_projection(
                                    frame, right_rectified)
                                pcl_converter.visualize_pcd()

                            frame = (65535 // frame).astype(np.uint8)
                            # colorize depth map, comment out code below to obtain grayscale
                            frame = cv2.applyColorMap(frame, cv2.COLORMAP_HOT)
                            # frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET)
                            cv2.putText(frame, packet.stream_name, (25, 25),
                                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
                            cv2.putText(
                                frame,
                                "fps: " + str(frame_count_prev[window_name]),
                                (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
                            print("line 336")
                    else:  # bgr
                        cv2.putText(frame, packet.stream_name, (25, 25),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                    (255, 255, 255))
                        cv2.putText(
                            frame,
                            "fps: " + str(frame_count_prev[window_name]),
                            (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
                        print("line 340")
                    if args['draw_bb_depth']:
                        camera = args['cnn_camera']
                        if camera == 'left_right':
                            camera = 'right'
                        if nnet_prev["entries_prev"][camera] is not None:
                            frame = show_nn(nnet_prev["entries_prev"][camera],
                                            frame,
                                            NN_json=NN_json,
                                            config=config,
                                            nn2depth=nn2depth)
                    cv2.imshow(window_name, frame)

                elif packet.stream_name == 'jpegout':
                    jpg = packetData
                    mat = cv2.imdecode(jpg, cv2.IMREAD_COLOR)
                    cv2.imshow('jpegout', mat)

                elif packet.stream_name == 'video':
                    videoFrame = packetData
                    videoFrame.tofile(video_file)
                    # mjpeg = packetData
                    # mat = cv2.imdecode(mjpeg, cv2.IMREAD_COLOR)
                    # cv2.imshow('mjpeg', mat)
                elif packet.stream_name == 'color':
                    meta = packet.getMetadata()
                    w = meta.getFrameWidth()
                    h = meta.getFrameHeight()
                    yuv420p = packetData.reshape((h * 3 // 2, w))
                    bgr = cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV)
                    scale = configMan.getColorPreviewScale()
                    bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)),
                                     interpolation=cv2.INTER_AREA)
                    cv2.putText(bgr, packet.stream_name, (25, 25),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                    cv2.putText(bgr,
                                "fps: " + str(frame_count_prev[window_name]),
                                (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                (0, 0, 0))
                    print("line 370")
                    cv2.imshow("color", bgr)

                elif packet.stream_name == 'meta_d2h':
                    str_ = packet.getDataAsStr()
                    dict_ = json.loads(str_)

                    print(
                        'meta_d2h Temp', ' CSS:' + '{:6.2f}'.format(
                            dict_['sensors']['temperature']['css']),
                        ' MSS:' + '{:6.2f}'.format(
                            dict_['sensors']['temperature']['mss']),
                        ' UPA:' + '{:6.2f}'.format(
                            dict_['sensors']['temperature']['upa0']),
                        ' DSS:' + '{:6.2f}'.format(
                            dict_['sensors']['temperature']['upa1']))
                elif packet.stream_name == 'object_tracker':
                    tracklets = packet.getObjectTracker()

                frame_count[window_name] += 1

            t_curr = time()
            if t_start + 1.0 < t_curr:
                t_start = t_curr
                # print("metaout fps: " + str(frame_count_prev["metaout"]))

                stream_windows = []
                for s in stream_names:
                    if s == 'previewout':
                        for cam in NN_cams:
                            stream_windows.append(s + '-' + cam)
                            frame_count_prev['nn'][cam] = frame_count['nn'][
                                cam]
                            frame_count['nn'][cam] = 0
                    else:
                        stream_windows.append(s)
                for w in stream_windows:
                    frame_count_prev[w] = frame_count[w]
                    frame_count[w] = 0

            key = cv2.waitKey(1)
            if key == ord('q'):
                break
            else:
                keypress_handler(self, key, stream_names)

        del p  # in order to stop the pipeline object should be deleted, otherwise device will continue working. This is required if you are going to add code after the main loop, otherwise you can ommit it.
        del self.device
        cv2.destroyAllWindows()

        # Close video output file if was opened
        if video_file is not None:
            video_file.close()

        print('py: DONE.')
Esempio n. 6
0
    while True:
        frames_dict = in_q.get()
        if frames_dict is None:
            return
        frames_path = dest / Path(str(uuid4()))
        frames_path.mkdir(parents=False, exist_ok=False)
        for stream_name, item in frames_dict.items():
            cv2.imwrite(str(frames_path / Path(f"{stream_name}.png")), item)


store_p = Process(target=store_frames, args=(frame_q, ))
store_p.start()

# Pipeline defined, now the device is connected to
try:
    with dai.Device() as device:
        cams = device.getConnectedCameras()
        depth_enabled = dai.CameraBoardSocket.LEFT in cams and dai.CameraBoardSocket.RIGHT in cams
        ps = None
        if depth_enabled:
            ps = PairingSystem()
        else:
            PairingSystem.seq_streams = []
        device.startPipeline(create_pipeline(depth_enabled))
        qControl = device.getInputQueue('control')

        ctrl = dai.CameraControl()
        if args.autofocus:
            ctrl.setAutoFocusMode(
                getattr(dai.CameraControl.AutoFocusMode, args.autofocus))
        if args.manualfocus:
Esempio n. 7
0
def test_pipeline():
    #pipeline, streams = create_rgb_cam_pipeline()
    #pipeline, streams = create_mono_cam_pipeline()
    pipeline, streams = create_stereo_depth_pipeline(source_camera)

    print("Creating DepthAI device")
    if 1:
        device = dai.Device(pipeline)
    else:  # For debug mode, with the firmware already loaded
        found, device_info = dai.XLinkConnection.getFirstDevice(
            dai.XLinkDeviceState.X_LINK_BOOTED)
        if found:
            device = dai.Device(pipeline, device_info)
        else:
            raise RuntimeError("Device not found")
    print("Starting pipeline")
    device.startPipeline()

    in_streams = []
    if not source_camera:
        # Reversed order trick:
        # The sync stage on device side has a timeout between receiving left
        # and right frames. In case a delay would occur on host between sending
        # left and right, the timeout will get triggered.
        # We make sure to send first the right frame, then left.
        in_streams.extend(['in_right', 'in_left'])
    in_q_list = []
    for s in in_streams:
        q = device.getInputQueue(s)
        in_q_list.append(q)

    # Create a receive queue for each stream
    q_list = []
    for s in streams:
        q = device.getOutputQueue(s, 8, True)
        q_list.append(q)

    # Need to set a timestamp for input frames, for the sync stage in Stereo node
    timestamp_ms = 0
    index = 0
    while True:
        # Handle input streams, if any
        if in_q_list:
            dataset_size = 2  # Number of image pairs
            frame_interval_ms = 33
            for q in in_q_list:
                name = q.getName()
                path = 'dataset/' + str(index) + '/' + name + '.png'
                data = cv2.imread(path,
                                  cv2.IMREAD_GRAYSCALE).reshape(720 * 1280)
                tstamp_ns = timestamp_ms * (1000 * 1000)
                tstamp = dai.Timestamp()
                tstamp.sec = tstamp_ns // (1000 * 1000 * 1000)
                tstamp.nsec = tstamp_ns % (1000 * 1000 * 1000)
                img = dai.ImgFrame()
                img.setData(data)
                img.setTimestamp(tstamp)
                img.setWidth(1280)
                img.setHeight(720)
                q.send(img)
                print("Sent frame: {:25s}".format(path), 'timestamp_ms:',
                      timestamp_ms)
            timestamp_ms += frame_interval_ms
            index = (index + 1) % dataset_size
            if 1:  # Optional delay between iterations, host driven pipeline
                sleep(frame_interval_ms / 1000)
        # Handle output streams
        for q in q_list:
            name = q.getName()
            image = q.get()
            #print("Received frame:", name)
            # Skip some streams for now, to reduce CPU load
            if name in ['left', 'right', 'depth']: continue
            frame = convert_to_cv2_frame(name, image)
            cv2.imshow(name, frame)
        if cv2.waitKey(1) == ord('q'):
            break

    print("Closing device")
    del device
def main(args):
    """
    Main programm loop.

    Parameters
    ----------
    args : command line arguments parsed by parse_arguments
    """
    # Setup PoseEstimator, pipeline, windows with sliders for PoseEstimator
    # options and load video if running on local video file
    camera = args["video"] is None
    if args["model"] not in model_list:
        raise ValueError("Unknown model '{}'".format(args["model"]))
    model_config = model_list[args["model"]]
    pose_estimator = get_poseestimator(model_config, **args)

    with dai.Device(
            create_pipeline(model_config, camera, passthrough=True,
                            **args)) as device:
        device.startPipeline()

        if camera:
            preview_queue = device.getOutputQueue("preview",
                                                  maxSize=4,
                                                  blocking=False)
        else:
            pose_in_queue = device.getInputQueue("pose_in")
        pose_queue = device.getOutputQueue("pose")
        passthrough_queue = device.getOutputQueue("passthrough")

        # Load video if given in command line and set the variables used below
        # to control FPS and looping of the video
        if not camera:
            if not os.path.exists(args["video"]):
                raise ValueError("Video '{}' does not exist.".format(
                    args["video"]))
            print("Loading video", args["video"])
            video = cv2.VideoCapture(args["video"])
            frame_interval = 1 / video.get(cv2.CAP_PROP_FPS)
            last_frame_time = 0
            frame_id = 0
        else:
            print("Running on OAK camera preview stream")

        # Create windows for the original video and the video of frames from
        # the NN passthrough. The window for the original video gets all the
        # option sliders to change pose estimator config
        video_window_name = "Original Video"
        passthrough_window_name = "Processed Video"
        video_window = SliderWindow(video_window_name)
        cv2.namedWindow(passthrough_window_name)
        video_window.add_poseestimator_options(pose_estimator, args)

        # Start main loop
        frame = None
        keypoints = None
        fps = FPS("Video", "NN", interval=0.1)
        timer = Timer("inference", "decode")
        while True:
            # Check for and handle slider changes
            slider_changes = video_window.get_changes()
            for option_name, value in slider_changes.items():
                pose_estimator.set_option(option_name, value)

            fps.start_frame()
            # Get next video frame (and submit for processing if local video)
            if camera:
                frame = preview_queue.get().getCvFrame()
                fps.count("Video")
            else:
                frame_time = time.perf_counter()
                # Only grab next frame from file at certain intervals to
                # roughly preserve its original FPS
                if frame_time - last_frame_time > frame_interval:
                    if video.grab():
                        __, frame = video.retrieve()
                        fps.count("Video")
                        last_frame_time = frame_time
                        # Create DepthAI ImgFrame object to pass to the
                        # camera
                        input_frame = pose_estimator.get_input_frame(frame)
                        frame_nn = dai.ImgFrame()
                        frame_nn.setSequenceNum(frame_id)
                        frame_nn.setWidth(input_frame.shape[2])
                        frame_nn.setHeight(input_frame.shape[1])
                        frame_nn.setType(dai.RawImgFrame.Type.BGR888p)
                        frame_nn.setFrame(input_frame)
                        pose_in_queue.send(frame_nn)
                        frame_id += 1
                    else:
                        frame_id = 0
                        video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)

            # Process pose data whenever a new packet arrives
            if pose_queue.has():
                raw_output = pose_queue.get()
                timer.start_timer("decode")
                keypoints = pose_estimator.get_pose_data(raw_output)
                timer.stop_timer("decode")
                fps.count("NN")
                # When keypoints are available we should also have a
                # passthrough frame to process and display. Make sure it is
                # availabe to avoid suprises.
                if passthrough_queue.has():
                    passthrough = passthrough_queue.get()
                    timer.frame_time("inference", passthrough)
                    passthrough_frame = passthrough.getCvFrame()
                    passthrough_frame = pose_estimator.get_original_frame(
                        passthrough_frame)
                    pose_estimator.draw_results(keypoints, passthrough_frame)
                    cv2.imshow(passthrough_window_name, passthrough_frame)

            # Annotate current video frame with keypoints and FPS
            if keypoints is not None:
                pose_estimator.draw_results(keypoints, frame)
            fps.update()
            fps.display(frame)

            cv2.imshow(video_window_name, frame)

            if cv2.waitKey(1) == ord("q"):
                break
        fps.print_totals()
        timer.print_times()
        cv2.destroyAllWindows()
Esempio n. 9
0
    def run(self):
        device = dai.Device(self.create_pipeline())
        device.startPipeline()

        q_video = device.getOutputQueue(name="cam_out",
                                        maxSize=1,
                                        blocking=False)
        q_pd_in = device.getInputQueue(name="pd_in")
        q_pd_out = device.getOutputQueue(name="pd_out",
                                         maxSize=4,
                                         blocking=True)
        q_lm_out = device.getOutputQueue(name="lm_out",
                                         maxSize=4,
                                         blocking=True)
        q_lm_in = device.getInputQueue(name="lm_in")
        q_asl_out = device.getOutputQueue(name="asl_out",
                                          maxSize=4,
                                          blocking=True)
        q_asl_in = device.getInputQueue(name="asl_in")

        while True:
            in_video = q_video.get()
            video_frame = in_video.getCvFrame()

            h, w = video_frame.shape[:2]
            self.frame_size = max(h, w)
            self.pad_h = int((self.frame_size - h) / 2)
            self.pad_w = int((self.frame_size - w) / 2)

            video_frame = cv2.copyMakeBorder(video_frame, self.pad_h,
                                             self.pad_h, self.pad_w,
                                             self.pad_w, cv2.BORDER_CONSTANT)

            frame_nn = dai.ImgFrame()
            frame_nn.setWidth(self.pd_input_length)
            frame_nn.setHeight(self.pd_input_length)
            frame_nn.setData(
                to_planar(video_frame,
                          (self.pd_input_length, self.pd_input_length)))
            q_pd_in.send(frame_nn)

            annotated_frame = video_frame.copy()

            # Get palm detection
            inference = q_pd_out.get()
            self.pd_postprocess(inference)

            # Send data for hand landmarks
            for i, r in enumerate(self.regions):
                img_hand = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_input_length,
                                             self.lm_input_length)
                nn_data = dai.NNData()
                nn_data.setLayer(
                    "input_1",
                    to_planar(img_hand,
                              (self.lm_input_length, self.lm_input_length)))
                q_lm_in.send(nn_data)

            # Retrieve hand landmarks
            for i, r in enumerate(self.regions):
                inference = q_lm_out.get()
                self.lm_postprocess(r, inference)
                hand_frame, handedness, hand_bbox = self.lm_render(
                    video_frame, annotated_frame, r)
                # ASL recognition
                if hand_frame is not None and self.asl_recognition:
                    hand_frame = cv2.resize(
                        hand_frame,
                        (self.asl_input_length, self.asl_input_length),
                        interpolation=cv2.INTER_NEAREST)
                    hand_frame = hand_frame.transpose(2, 0, 1)
                    nn_data = dai.NNData()
                    nn_data.setLayer("input", hand_frame)
                    q_asl_in.send(nn_data)
                    asl_result = np.array(q_asl_out.get().getFirstLayerFp16())
                    asl_idx = np.argmax(asl_result)
                    # Recognized ASL character is associated with a probability
                    asl_char = [
                        characters[asl_idx],
                        round(asl_result[asl_idx] * 100, 1)
                    ]
                    selected_char = asl_char
                    current_char_queue = None
                    if handedness > 0.5:
                        current_char_queue = self.right_char_queue
                    else:
                        current_char_queue = self.left_char_queue
                    current_char_queue.append(selected_char)
                    # Peform filtering of recognition resuls using the previous 5 results
                    # If there aren't enough reults, take the first result as output
                    if len(current_char_queue) < 5:
                        selected_char = current_char_queue[0]
                    else:
                        char_candidate = {}
                        for i in range(5):
                            if current_char_queue[i][0] not in char_candidate:
                                char_candidate[current_char_queue[i][0]] = [
                                    1, current_char_queue[i][1]
                                ]
                            else:
                                char_candidate[current_char_queue[i]
                                               [0]][0] += 1
                                char_candidate[current_char_queue[i][0]][
                                    1] += current_char_queue[i][1]
                        most_voted_char = ""
                        max_votes = 0
                        most_voted_char_prob = 0
                        for key in char_candidate:
                            if char_candidate[key][0] > max_votes:
                                max_votes = char_candidate[key][0]
                                most_voted_char = key
                                most_voted_char_prob = round(
                                    char_candidate[key][1] /
                                    char_candidate[key][0], 1)
                        selected_char = (most_voted_char, most_voted_char_prob)

                    if self.show_asl:
                        gesture_string = "Letter: " + selected_char[
                            0] + ", " + str(selected_char[1]) + "%"
                        textSize = self.ft.getTextSize(gesture_string,
                                                       fontHeight=14,
                                                       thickness=-1)[0]
                        cv2.rectangle(video_frame,
                                      (hand_bbox[0] - 5, hand_bbox[1]),
                                      (hand_bbox[0] + textSize[0] + 5,
                                       hand_bbox[1] - 18), (36, 152, 0), -1)
                        self.ft.putText(img=video_frame,
                                        text=gesture_string,
                                        org=(hand_bbox[0], hand_bbox[1] - 5),
                                        fontHeight=14,
                                        color=(255, 255, 255),
                                        thickness=-1,
                                        line_type=cv2.LINE_AA,
                                        bottomLeftOrigin=True)

            video_frame = video_frame[self.pad_h:self.pad_h + h,
                                      self.pad_w:self.pad_w + w]
            cv2.imshow("hand tracker", video_frame)
            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_hand_box = not self.show_hand_box
            elif key == ord('2'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('3'):
                self.show_asl = not self.show_asl
         Path('face-detection-retail-0004.blob')).resolve().absolute()))
cam_rgb.preview.link(detection_nn.input)

xout_rgb = pipeline.createXLinkOut()
xout_rgb.setStreamName("rgb")
cam_rgb.preview.link(xout_rgb.input)

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)

found, device_info = depthai.XLinkConnection.getFirstDevice(
    depthai.XLinkDeviceState.X_LINK_UNBOOTED)
if not found:
    raise RuntimeError("Device not found")
device = depthai.Device(pipeline, device_info)
device.startPipeline()

q_rgb = device.getOutputQueue("rgb")
q_nn = device.getOutputQueue("nn")

frame = None
bboxes = []


def frame_norm(frame, bbox):
    return (np.array(bbox) *
            np.array([*frame.shape[:2], *frame.shape[:2]])[::-1]).astype(int)


while True:
    str((Path(__file__).parent /
         Path('face-detection-retail-0004.blob')).resolve().absolute()))
detection_nn.setConfidenceThreshold(0.5)
cam_rgb.preview.link(detection_nn.input)

xout_rgb = pipeline.createXLinkOut()
xout_rgb.setStreamName("rgb")
cam_rgb.preview.link(xout_rgb.input)

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)

# Pipeline is now finished, and we need to find an available device to run our pipeline
# we are using context manager here that will dispose the device after we stop using it
with depthai.Device(pipeline) as device:
    q_rgb = device.getOutputQueue("rgb")
    q_nn = device.getOutputQueue("nn")

    frame = None
    detections = []

    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)

    while True:
        in_rgb = q_rgb.tryGet()
        in_nn = q_nn.tryGet()
Esempio n. 12
0
if args.show_preview:
    cv2.namedWindow('disparity', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('disparity', int(color_width / 2), int(color_height / 2))

    cv2.namedWindow('disparity th', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('disparity th', int(color_width / 2),
                     int(color_height / 2))

    cv2.namedWindow('rgb', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('rgb', int(color_width / 2), int(color_height / 2))

    cv2.namedWindow('combo', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('combo', int(color_width / 2), int(color_height / 2))

# Pipeline defined, now the device is connected to
with dai.Device(pipeline, usb2Mode=args.force_usb2) as device:
    # Start pipeline
    device.startPipeline()

    if (args.show_preview or args.write_preview) and args.disparity:
        # Output queue will be used to get the rgb frames from the output defined above
        #q_rgb  = device.getOutputQueue(name="rgb",		maxSize=4,	blocking=False)
        q_dep = device.getOutputQueue(name="disparity",
                                      maxSize=4,
                                      blocking=False)

    # Output queue will be used to get the encoded data from the output defined above
    if args.rgb:
        q_265c = device.getOutputQueue(name="h265_rgb",
                                       maxSize=30,
                                       blocking=False)
Esempio n. 13
0
def run_record():
    # Record from all available devices
    with contextlib.ExitStack() as stack:
        device_infos = dai.Device.getAllAvailableDevices()

        if len(device_infos) == 0:
            raise RuntimeError("No devices found!")
        else:
            print("Found", len(device_infos), "devices")

        recordings = []
        # TODO: allow users to specify which available devices should record
        for device_info in device_infos:
            openvino_version = dai.OpenVINO.Version.VERSION_2021_4
            usb2_mode = False
            device = stack.enter_context(
                dai.Device(openvino_version, device_info, usb2_mode))

            # Create recording object for this device
            recording = Record(save_path, device)
            # Set recording configuration
            # TODO: add support for specifying resolution
            recording.set_fps(args.fps)
            recording.set_timelapse(args.timelapse)
            recording.set_save_streams(args.save)
            recording.set_quality(EncodingQuality[args.quality])
            recording.set_preview(args.display)
            recording.start()

            recordings.append(recording)

        queues = [q for recording in recordings for q in recording.queues]
        frame_counter = 0
        start_time = time.time()
        timelapse = 0
        while True:
            try:
                for q in queues:
                    if 0 < args.timelapse and time.time(
                    ) - timelapse < args.timelapse:
                        continue
                    new_msg = q['q'].tryGet()
                    if new_msg is not None:
                        q['msgs'].append(new_msg)
                        if check_sync(queues, new_msg.getTimestamp()):
                            # Wait for Auto focus/exposure/white-balance
                            if time.time() - start_time < 1.5: continue
                            # Timelapse
                            if 0 < args.timelapse: timelapse = time.time()
                            if args.frame_cnt == frame_counter:
                                raise KeyboardInterrupt
                            frame_counter += 1

                            for recording in recordings:
                                frames = {}
                                for stream in recording.queues:
                                    frames[stream['name']] = stream[
                                        'msgs'].pop(0).getCvFrame()
                                    if stream['name'] == 'preview':
                                        cv2.imshow(q['mxid'],
                                                   frames[stream['name']])
                                        del frames[stream['name']]
                                recording.frame_q.put(frames)
                # Avoid lazy looping
                time.sleep(0.001)
                if cv2.waitKey(1) == ord('q'):
                    break
            except KeyboardInterrupt:
                break

        for recording in recordings:
            recording.frame_q.put(None)
            recording.process.join()  # Terminate the process
        print("All recordings have stopped. Exiting program")
Esempio n. 14
0
    def thread_function(self, name):
        device = None
        if self.debug_mode:
            print('Cmd file: ', self.cmd_file, ' args["device_id"]: ',
                  self.args['device_id'])
            device = depthai.Device(self.cmd_file, self.args['device_id'])
        else:
            device = depthai.Device(self.args['device_id'], self.usb2_mode)

        print('Available streams: ' + str(device.get_available_streams()))

        # create the pipeline, here is the first connection with the device
        self.pipeline = device.create_pipeline(config=self.config)

        if self.pipeline is None:
            print('Pipeline is not created.')
            exit(3)

        # add nn2depth to a parameter so clients can get at it to decode depth?
        # setup a param for sharing the depth mapping for this particular device.
        nn2depth = device.get_nn_to_depth_bbox_mapping()
        nn2depthStr = json.dumps(nn2depth)
        self.nn2depthParamName = "nn2depth" + self.targetDev
        paramDefault = ""
        paramDesc = ParameterDescriptor(type=ParameterType.PARAMETER_STRING,
                                        description="Used ")
        self.declare_parameter(self.nn2depthParamName, nn2depthStr, paramDesc)

        t_start = time()
        frame_count = {}
        frame_count_prev = {}
        nnet_prev = {}
        nnet_prev["entries_prev"] = {}
        nnet_prev["nnet_source"] = {}
        frame_count['nn'] = {}
        frame_count_prev['nn'] = {}

        NN_cams = {'rgb', 'left', 'right'}

        for cam in NN_cams:
            nnet_prev["entries_prev"][cam] = []
            nnet_prev["nnet_source"][cam] = []
            frame_count['nn'][cam] = 0
            frame_count_prev['nn'][cam] = 0
        #-------------------------------------------------------

        stream_windows = []
        for s in self.stream_names:
            if s == 'previewout':
                for cam in NN_cams:
                    stream_windows.append(s + '-' + cam)
            else:
                stream_windows.append(s)

        for w in stream_windows:
            frame_count[w] = 0
            frame_count_prev[w] = 0

        tracklets = None

        #-------------------------------------------------------
        # start watchdog
        reset_process_wd()
        #-------------------------------------------------------

        while True:
            # retreive data from the device
            # data is stored in packets, there are nnet (Neural NETwork) packets which have additional functions for NNet result interpretation
            nnet_packets, data_packets = self.pipeline.get_available_nnet_and_data_packets(
                True)

            #-------------------------------------------------------
            # TODO: should move the watchdog stuff to a shared class
            #-------------------------------------------------------
            packets_len = len(nnet_packets) + len(data_packets)
            if packets_len != 0:
                reset_process_wd()
            else:
                cur_time = monotonic()
                if cur_time > wd_cutoff:
                    print("process watchdog timeout")
                    os._exit(10)
            #-------------------------------------------------------

            for _, nnet_packet in enumerate(nnet_packets):
                meta = nnet_packet.getMetadata()
                camera = 'rgb'
                if meta != None:
                    camera = meta.getCameraName()

                serializedEntry = self.decode_nn(nnet_packet,
                                                 config=self.config,
                                                 NN_json=self.NN_json)

                self.nnmsg.data = str(serializedEntry)
                self.nnResultPublisher.publish(self.nnmsg)

                frame_count['metaout'] += 1
                frame_count['nn'][camera] += 1

            for packet in data_packets:
                window_name = packet.stream_name
                if packet.stream_name not in self.stream_names:
                    continue  # skip streams that were automatically added
                packetData = packet.getData()
                if packetData is None:
                    print('Invalid packet data!')
                    continue
                elif packet.stream_name == 'previewout':
                    #broadcast to previewout
                    meta = packet.getMetadata()
                    camera = 'rgb'
                    if meta != None:
                        camera = meta.getCameraName()

                    window_name = 'previewout-' + camera
                    # the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we
                    # change shape (3, 300, 300) -> (300, 300, 3)
                    data0 = packetData[0, :, :]
                    data1 = packetData[1, :, :]
                    data2 = packetData[2, :, :]
                    frame = cv2.merge([data0, data1, data2])

                    self.publishFrame(frame, self.previewPublisher,
                                      self.previewmsg)

                elif packet.stream_name == 'left':
                    frame_bgr = packetData
                    self.publishFrame(frame_bgr, self.leftPublisher,
                                      self.leftmsg)
                elif packet.stream_name == 'right':
                    frame_bgr = packetData
                    self.publishFrame(frame_bgr, self.rightPublisher,
                                      self.rightmsg)
                elif packet.stream_name == 'disparity':
                    frame_bgr = packetData
                    self.publishFrame(frame_bgr, self.disparityPublisher,
                                      self.disparitymsg)

                elif packet.stream_name.startswith('depth'):
                    frame = packetData
                    self.publishFrame(frame, self.depthPublisher,
                                      self.depthmsg)

                # TODO: maybe just publish the filepath?
                elif packet.stream_name == 'jpegout':
                    jpg = packetData
                    mat = cv2.imdecode(jpg, cv2.IMREAD_COLOR)
                    cv2.imshow('jpegout', mat)

                elif packet.stream_name == 'video':
                    videoFrame = packetData
                    videoFrame.tofile(video_file)
                    #mjpeg = packetData
                    #mat = cv2.imdecode(mjpeg, cv2.IMREAD_COLOR)
                    #cv2.imshow('mjpeg', mat)

                elif packet.stream_name == 'meta_d2h':
                    str_ = packet.getDataAsStr()
                    self.d2hmsg.data = str_
                    self.d2hPublisher.publish(self.d2hmsg)

                elif packet.stream_name == 'object_tracker':
                    tracklets = packet.getObjectTracker()

                frame_count[window_name] += 1

            key = cv2.waitKey(1)
Esempio n. 15
0
stereo.syncedRight.link(xoutRight.input)
stereo.disparity.link(xoutDisparity.input)
if depth:
    stereo.depth.link(xoutDepth.input)
if outRectified:
    stereo.rectifiedLeft.link(xoutRectifLeft.input)
    stereo.rectifiedRight.link(xoutRectifRight.input)

streams = ["left", "right"]
if outRectified:
    streams.extend(["rectifiedLeft", "rectifiedRight"])
streams.append("disparity")
if depth:
    streams.append("depth")

calibData = dai.Device().readCalibration()
leftMesh, rightMesh = getMesh(calibData)
if generateMesh:
    meshLeft = list(leftMesh.tobytes())
    meshRight = list(rightMesh.tobytes())
    stereo.loadMeshData(meshLeft, meshRight)

if meshDirectory is not None:
    saveMeshFiles(leftMesh, rightMesh, meshDirectory)


print("Creating DepthAI device")
with dai.Device(pipeline) as device:
    # Create a receive queue for each stream
    qList = [device.getOutputQueue(stream, 8, blocking=False) for stream in streams]
Esempio n. 16
0
    return np.vstack((x.flatten(), y.flatten(), np.ones_like(x.flatten())))


def cvt_to_bgr(packet):
    meta = packet.getMetadata()
    w = meta.getFrameWidth()
    h = meta.getFrameHeight()
    # print((h, w))
    packetData = packet.getData()
    yuv420p = packetData.reshape((h * 3 // 2, w))
    return cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV)


curr_dir = str(Path('.').resolve().absolute())

device = depthai.Device("", args.force_usb2)
pipeline = device.create_pipeline(
    config={
        'streams': ['right', 'depth', 'color', 'left'],
        'ai': {
            "blob_file":
            str(
                Path('./mobilenet-ssd/mobilenet-ssd.blob').resolve().absolute(
                )),
        },
        'camera': {
            'mono': {
                'resolution_h': 720,
                'fps': 30
            },
            'rgb': {
#!/usr/bin/env python3

import depthai as dai

# Connect device
with dai.Device(dai.OpenVINO.VERSION_2021_4, dai.UsbSpeed.HIGH) as device:

    try:
        device.factoryResetCalibration()
        print(f'Factory reset calibration OK')
    except Exception as ex:
        print(f'Factory reset calibration FAIL: {ex}')
Esempio n. 18
0
# Re-use the initial values for multiplier/addend
script.outputs['mean'].link(nn.inputs['mean'])
nn.inputs['mean'].setWaitForMessage(False)

script.outputs['scale'].link(nn.inputs['scale'])
nn.inputs['scale'].setWaitForMessage(False)
# Always wait for the new frame before starting inference
camRgb.preview.link(nn.inputs['frame'])

# Send normalized frame values to host
nn_xout = p.createXLinkOut()
nn_xout.setStreamName("nn")
nn.out.link(nn_xout.input)

# Pipeline is defined, now we can connect to the device
with dai.Device(p) as device:
    qNn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
    shape = (3, SHAPE, SHAPE)
    while True:
        inNn = np.array(qNn.get().getData())
        # Get back the frame. It's currently normalized to -0.5 - 0.5
        frame = inNn.view(np.float16).reshape(shape).transpose(1, 2, 0)
        # To get original frame back (0-255), we add multiply all frame values (pixels) by 255 and then add 127.5 to them
        frame = (frame * 255.0 + 127.5).astype(np.uint8)
        # Show the initial frame
        cv2.imshow("Original frame", frame)

        if cv2.waitKey(1) == ord('q'):
            break
Esempio n. 19
0
    def create_system_logger(self):
        self.nodes.system_logger = self.p.createSystemLogger()
        self.nodes.system_logger.setRate(1)

        if len(conf.args.report) > 0:
            self.nodes.xout_system_logger = self.p.createXLinkOut()
            self.nodes.xout_system_logger.setStreamName("system_logger")
            self.nodes.system_logger.out.link(
                self.nodes.xout_system_logger.input)


device_info = conf.getDeviceInfo()

# Pipeline is defined, now we can connect to the device
with dai.Device(dai.OpenVINO.Version.VERSION_2021_3, device_info) as device:
    conf.adjustParamsToDevice(device)

    nn_manager = NNetManager(model_name=conf.getModelName(),
                             model_dir=conf.getModelDir(),
                             source=conf.getModelSource(),
                             use_depth=conf.useDepth,
                             use_hq=conf.useHQ)

    pm = PipelineManager(nn_manager)
    cap = cv2.VideoCapture(conf.args.video) if not conf.useCamera else None
    fps = FPSHandler() if conf.useCamera else FPSHandler(cap)

    if conf.useCamera:
        pv = PreviewManager(fps)
Esempio n. 20
0
xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)

# MobilenetSSD label texts
texts = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
         "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

for text in texts:
    (Path(__file__).parent / Path(f'data/{text}')).mkdir(parents=True, exist_ok=True)
(Path(__file__).parent / Path(f'data/raw')).mkdir(parents=True, exist_ok=True)


# Pipeline defined, now the device is connected to
with dai.Device() as device, open('data/dataset.csv', 'w') as dataset_file:
    dataset = csv.DictWriter(
        dataset_file,
        ["timestamp", "label", "left", "top", "right", "bottom", "raw_frame", "overlay_frame", "cropped_frame"]
    )
    dataset.writeheader()

    # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height
    def frame_norm(frame, bbox):
        norm_vals = np.full(len(bbox), frame.shape[0])
        norm_vals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * norm_vals).astype(int)


    def store_data(in_frame, detections):
        timestamp = int(time.time() * 10000)
Esempio n. 21
0
        def startLoop(self):
                cliArgs = CliArgs()
                args = vars(cliArgs.parse_args())

                configMan = DepthConfigManager(args)
                if is_rpi and args['pointcloud']:
                        raise NotImplementedError("Point cloud visualization is currently not supported on RPI")
                # these are largely for debug and dev.
                cmd_file, debug_mode = configMan.getCommandFile()
                usb2_mode = configMan.getUsb2Mode()

                # decode_nn and show_nn are functions that are dependent on the neural network that's being run.
                decode_nn = configMan.decode_nn
                show_nn = configMan.show_nn

                # Labels for the current neural network. They are parsed from the blob config file.
                labels = configMan.labels
                NN_json = configMan.NN_config

                # This json file is sent to DepthAI. It communicates what options you'd like to enable and what model you'd like to run.
                config = configMan.jsonConfig

                # Create a list of enabled streams ()
                stream_names = [stream if isinstance(stream, str) else stream['name'] for stream in configMan.stream_list]

                enable_object_tracker = 'object_tracker' in stream_names

                # grab video file, if option exists
                video_file = configMan.video_file


                self.device = None
                if debug_mode:
                        print('Cmd file: ', cmd_file, ' args["device_id"]: ', args['device_id'])
                        self.device = depthai.Device(cmd_file, args['device_id'])
                else:
                        self.device = depthai.Device(args['device_id'], usb2_mode)

                print(stream_names)
                print('Available streams: ' + str(self.device.get_available_streams()))

                # create the pipeline, here is the first connection with the device
                p = self.device.create_pipeline(config=config)

                if p is None:
                        print('Pipeline is not created.')
                        exit(3)


                nn2depth = self.device.get_nn_to_depth_bbox_mapping()

                t_start = time()
                frame_count = {}
                frame_count_prev = {}
                nnet_prev = {}
                nnet_prev["entries_prev"] = {}
                nnet_prev["nnet_source"] = {}
                frame_count['nn'] = {}
                frame_count_prev['nn'] = {}

                NN_cams = {'rgb', 'left', 'right'}

                for cam in NN_cams:
                        nnet_prev["entries_prev"][cam] = None
                        nnet_prev["nnet_source"][cam] = None
                        frame_count['nn'][cam] = 0
                        frame_count_prev['nn'][cam] = 0

                stream_windows = []
                for s in stream_names:
                        if s == 'previewout':
                                for cam in NN_cams:
                                        stream_windows.append(s + '-' + cam)
                        else:
                                stream_windows.append(s)

                for w in stream_windows:
                        frame_count[w] = 0
                        frame_count_prev[w] = 0

                tracklets = None

                self.reset_process_wd()

                time_start = time()
                def print_packet_info(packet):
                        meta = packet.getMetadata()
                        print("[{:.6f} {:15s}]".format(time()-time_start, packet.stream_name), end='')
                        if meta is not None:
                                print(" {:.6f}".format(meta.getTimestamp()), meta.getSequenceNum(), end='')
                                if not (packet.stream_name.startswith('disparity')
                                         or packet.stream_name.startswith('depth')):
                                        print('', meta.getCameraName(), end='')
                        print()
                        return

                for stream in stream_names:
                        if stream in ["disparity", "disparity_color", "depth"]:
                                cv2.namedWindow(stream)
                                trackbar_name = 'Disparity confidence'
                                conf_thr_slider_min = 0
                                conf_thr_slider_max = 255
                                cv2.createTrackbar(trackbar_name, stream, conf_thr_slider_min, conf_thr_slider_max, self.on_trackbar_change)
                                cv2.setTrackbarPos(trackbar_name, stream, args['disparity_confidence_threshold'])

                right_rectified = None
                pcl_not_set = True


                # ops = 0
                # prevTime = time()
                # task  = False

                if is_rpi and args['motor']:
                    """
                    Insert Code for App 2 Here
                    """
                if args['cnn_model'] == "app2":
                    """
                    Insert Code for App 2 Here
                    """
                try:
                    pickle_file_data = read_pickle("face_encodings")
                except:
                    pickle_file_data = []
                self.schedule_task_()
                if is_rpi:
                    """
                    Insert Code for App 1 Here
                    """
                while self.runThread:
                        

                        # retreive data from the device
                        # data is stored in packets, there are nnet (Neural NETwork) packets which have additional functions for NNet result interpretation
                        self.nnet_packets, self.data_packets = p.get_available_nnet_and_data_packets(blocking=True)
                        detected_label = []

                        ### Uncomment to print ops
                        # ops = ops + 1
                        # if time() - prevTime > 1.0:
                        #     print('OPS: ', ops)
                        #     ops = 0
                        #     prevTime = time()

                        packets_len = len(self.nnet_packets) + len(self.data_packets)
                        if packets_len != 0:
                                self.reset_process_wd()
                        else:
                                cur_time=monotonic()
                                if cur_time > wd_cutoff:
                                        print("process watchdog timeout")
                                        os._exit(10)

                        for _, nnet_packet in enumerate(self.nnet_packets):
                                if args['verbose']: print_packet_info(nnet_packet)

                                meta = nnet_packet.getMetadata()
                                camera = 'rgb'
                                if meta != None:
                                        camera = meta.getCameraName()
                                nnet_prev["nnet_source"][camera] = nnet_packet
                                nnet_prev["entries_prev"][camera] = decode_nn(nnet_packet, config=config, NN_json=NN_json)
                                frame_count['metaout'] += 1
                                frame_count['nn'][camera] += 1

                        for packet in self.data_packets:
                                window_name = packet.stream_name
                                if packet.stream_name not in stream_names:
                                        continue # skip streams that were automatically added
                                if args['verbose']: print_packet_info(packet)
                                packetData = packet.getData()
                                if packetData is None:
                                        print('Invalid packet data!')
                                        continue
                                elif packet.stream_name == 'previewout':
                                        meta = packet.getMetadata()
                                        camera = 'rgb'
                                        if meta != None:
                                                camera = meta.getCameraName()

                                        window_name = 'previewout-' + camera
                                        # the format of previewout image is CHW (Chanel, Height, Width), but OpenCV needs HWC, so we
                                        # change shape (3, 300, 300) -> (300, 300, 3)
                                        data0 = packetData[0,:,:]
                                        data1 = packetData[1,:,:]
                                        data2 = packetData[2,:,:]
                                        frame = cv2.merge([data0, data1, data2])

                                        if nnet_prev["entries_prev"][camera] is not None:
                                                try:
                                                    
                                                    global task_run_motor, task_play_sound, task_start_led
                                                    if args['cnn_model2'] == "app5_landmark":
                                                         """
                                                        Insert Code for App 5 Here
                                                        """
                                                         
                                                    elif args['cnn_model'] == "app4":
                                                        """
                                                        Insert Code for App 4 Here
                                                        """
                                                    elif args['social_distance']:
                                                        """
                                                        Insert Code for App 2 Here
                                                        """
                                                    else:
                                                            frame, detected_label = show_nn(nnet_prev["entries_prev"][camera], frame, NN_json=NN_json, config=config)
                                                            print(detected_label)
                                                    for label in detected_label:
                                                            

                                                            if str(label) == "mask" and is_rpi and args['motor'] and task_run_motor:
                                                                """
                                                                Insert Code for App 3 Here
                                                                """
                                                            if str(label) == "person" and args['play_sound'] and task_play_sound:
                                                                """
                                                                Insert Code for App 1 Here
                                                                """
                                                            if is_rpi and args['cnn_model'] == "app1" and str(label) \
                                                                """
                                                                Insert Code for App 1 Here
                                                                """
                                                                    
                                                                    
                                                except:
                                                        
                                                        frame = show_nn(nnet_prev["entries_prev"][camera], frame, NN_json=NN_json, config=config)

                                                if enable_object_tracker and tracklets is not None:
                                                    frame = show_tracklets(tracklets, frame, labels)
                                        #cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),2)
                                        cv2.putText(frame, "NN fps: " + str(frame_count_prev['nn'][camera]), (2, frame.shape[0]-4), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0))
                                        cv2.imshow(window_name, frame)
                                elif packet.stream_name in ['left', 'right', 'disparity', 'rectified_left', 'rectified_right']:
                                        frame_bgr = packetData
                                        if args['pointcloud'] and packet.stream_name == 'rectified_right':
                                                right_rectified = packetData
                                        cv2.putText(frame_bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                                        cv2.putText(frame_bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                                        camera = None
                                        if args['draw_bb_depth']:
                                                camera = args['cnn_camera']
                                                if packet.stream_name == 'disparity':
                                                        if camera == 'left_right':
                                                                camera = 'right'
                                                elif camera != 'rgb':
                                                        camera = packet.getMetadata().getCameraName()
                                                if nnet_prev["entries_prev"][camera] is not None:
                                                        frame_bgr, detected_label = show_nn(nnet_prev["entries_prev"][camera], frame_bgr,
                                                                                                                                NN_json=NN_json,
                                                                                                 config=config, nn2depth=nn2depth)
                                        cv2.imshow(window_name, frame_bgr)
                                elif packet.stream_name.startswith('depth') or packet.stream_name == 'disparity_color':
                                        frame = packetData

                                        if len(frame.shape) == 2:
                                                if frame.dtype == np.uint8: # grayscale
                                                        cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))
                                                        cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255))
                                                else: # uint16
                                                        if args['pointcloud'] and "depth" in stream_names and "rectified_right" in stream_names and right_rectified is not None:
                                                                if pcl_not_set:
                                                                        pcl_converter = PointCloudVisualizer(self.device.get_right_intrinsic(), 1280, 720)
                                                                        pcl_not_set =  False
                                                                right_rectified = cv2.flip(right_rectified, 1)
                                                                pcd = pcl_converter.rgbd_to_projection(frame, right_rectified)
                                                                pcl_converter.visualize_pcd()

                                                        frame = (65535 // frame).astype(np.uint8)
                                                        #colorize depth map, comment out code below to obtain grayscale
                                                        frame = cv2.applyColorMap(frame, cv2.COLORMAP_HOT)
                                                        # frame = cv2.applyColorMap(frame, cv2.COLORMAP_JET)
                                                        cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
                                                        cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)
                                        else: # bgr
                                                cv2.putText(frame, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255))
                                                cv2.putText(frame, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, 255)

                                        if args['draw_bb_depth']:
                                                camera = args['cnn_camera']
                                                if camera == 'left_right':
                                                        camera = 'right'
                                                if nnet_prev["entries_prev"][camera] is not None:
                                                        frame, detected_label = show_nn(nnet_prev["entries_prev"][camera], frame, NN_json=NN_json,
                                                                                         config=config, nn2depth=nn2depth)
                                        cv2.imshow(window_name, frame)

                                elif packet.stream_name == 'jpegout':
                                        jpg = packetData
                                        mat = cv2.imdecode(jpg, cv2.IMREAD_COLOR)
                                        cv2.imshow('jpegout', mat)

                                elif packet.stream_name == 'video':
                                        videoFrame = packetData
                                        videoFrame.tofile(video_file)
                                        #mjpeg = packetData
                                        #mat = cv2.imdecode(mjpeg, cv2.IMREAD_COLOR)
                                        #cv2.imshow('mjpeg', mat)
                                elif packet.stream_name == 'color':
                                        meta = packet.getMetadata()
                                        w = meta.getFrameWidth()
                                        h = meta.getFrameHeight()
                                        yuv420p = packetData.reshape( (h * 3 // 2, w) )
                                        bgr = cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV)
                                        scale = configMan.getColorPreviewScale()
                                        bgr = cv2.resize(bgr, ( int(w*scale), int(h*scale) ), interpolation = cv2.INTER_AREA)
                                        cv2.putText(bgr, packet.stream_name, (25, 25), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                                        cv2.putText(bgr, "fps: " + str(frame_count_prev[window_name]), (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0))
                                        cv2.imshow("color", bgr)

                                elif packet.stream_name == 'meta_d2h':
                                        str_ = packet.getDataAsStr()
                                        dict_ = json.loads(str_)

                                        print('meta_d2h Temp',
                                                ' CSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['css']),
                                                ' MSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['mss']),
                                                ' UPA:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa0']),
                                                ' DSS:' + '{:6.2f}'.format(dict_['sensors']['temperature']['upa1']))
                                elif packet.stream_name == 'object_tracker':
                                        tracklets = packet.getObjectTracker()

                                frame_count[window_name] += 1

                        t_curr = time()
                        if t_start + 1.0 < t_curr:
                                t_start = t_curr
                                # print("metaout fps: " + str(frame_count_prev["metaout"]))

                                stream_windows = []
                                for s in stream_names:
                                        if s == 'previewout':
                                                for cam in NN_cams:
                                                        stream_windows.append(s + '-' + cam)
                                                        frame_count_prev['nn'][cam] = frame_count['nn'][cam]
                                                        frame_count['nn'][cam] = 0
                                        else:
                                                stream_windows.append(s)
                                for w in stream_windows:
                                        frame_count_prev[w] = frame_count[w]
                                        frame_count[w] = 0

                        key = cv2.waitKey(1)
                        if key == ord('c'):
                                if 'jpegout' in stream_names == 0:
                                        print("'jpegout' stream not enabled. Try settings -s jpegout to enable it")
                                else:
                                        self.device.request_jpeg()
                        elif key == ord('f'):
                                self.device.request_af_trigger()
                        elif key == ord('1'):
                                self.device.request_af_mode(depthai.AutofocusMode.AF_MODE_AUTO)
                        elif key == ord('2'):
                                self.device.request_af_mode(depthai.AutofocusMode.AF_MODE_CONTINUOUS_VIDEO)
                        elif key == ord('a'):
                            user_name = input()
                            add_face = True
                        elif key == ord('q'):
                                break
                    

                del p  # in order to stop the pipeline object should be deleted, otherwise device will continue working. This is required if you are going to add code after the main loop, otherwise you can ommit it.
                del self.device

                # Close video output file if was opened
                if video_file is not None:
                        video_file.close()

                print('py: DONE.')
Esempio n. 22
0
    def setup(self):
        '''!
            Configs the OAK stage.

            Creates the pipeline for the OAK, 
            creating all the nodes and substage nodes needed,
            linking them, initializing the device 
            and creating the queues.

            Also gets the calibration data
        '''

        self._config_lock = True

        if self._device is not None:
            return

        pipeline = dai.Pipeline()
        self.pipeline = pipeline

        self.depth = self._configs["depth"]
        self.nodes = {}

        self._set_preview_size()
        self._create_cameras(pipeline)
        self._create_substages_nodes(pipeline) 

        #Instantiate depth
        using_nn = False
        for node in self.nodes:
            if isinstance(self.nodes[node], dai.node.NeuralNetwork):
                using_nn = True
                break

        self._create_depth(pipeline, using_nn)
        self._create_manips(pipeline)
        self._link(pipeline)

        device = dai.Device(pipeline)
        self._device = device
        device.startPipeline()

        self._create_queues(device)
        self._read_calibration(device)

        # Set data and context values

        self.set_context("frame_id", self._configs["frame_id"])
        self.set_context("output_queues", self.oak_output_queue)
        self.set_context("input_queues", self.oak_input_queue)
        self.set_context("camera_intrinsics", self.camera_intrinsics)
        self.set_context("camera_calibration_size", self.camera_calibration_size)
        self.set_context("get3DPosition", self.get3DPosition)
        
        self.data = {}
        self.frame = {}

        if OAK_Stage.COLOR in self.oak_output_queue:
            self.data[OAK_Stage.COLOR] = None
            self.frame[OAK_Stage.COLOR] = None
        if OAK_Stage.DEPTH in self.oak_output_queue: 
            self.data[OAK_Stage.DEPTH] = None
            self.frame[OAK_Stage.DEPTH] = None
        
        self.set_context("data", self.data)
        self.set_context("frame", self.frame)
    return np.vstack((x.flatten(), y.flatten(), np.ones_like(x.flatten())))


def cvt_to_bgr(packet):
    meta = packet.getMetadata()
    w = meta.getFrameWidth()
    h = meta.getFrameHeight()
    # print((h, w))
    packetData = packet.getData()
    yuv420p = packetData.reshape((h * 3 // 2, w))
    return cv2.cvtColor(yuv420p, cv2.COLOR_YUV2BGR_IYUV)


curr_dir = str(Path('.').resolve().absolute())

device = depthai.Device("", False)
pipeline = device.create_pipeline(
    config={
        'streams': ['right', 'depth', 'color', 'left'],
        'ai': {
            "blob_file":
            str(
                Path('./mobilenet-ssd/mobilenet-ssd.blob').resolve().absolute(
                )),
        },
        'camera': {
            'mono': {
                'resolution_h': 720,
                'fps': 30
            },
            'rgb': {
Esempio n. 24
0
    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
else:
    colorCam.preview.link(xoutRgb.input)

spatialDetectionNetwork.out.link(xoutNN.input)
spatialDetectionNetwork.boundingBoxMapping.link(xoutBoundingBoxDepthMapping.input)

stereo.depth.link(spatialDetectionNetwork.inputDepth)
spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)

x_coordinates_set = []
y_coordinates_set = []
z_coordinates_set = []

# Pipeline is defined, now we can connect to the device
with dai.Device(pipeline, device_info) as device:
    # Start pipeline
    device.startPipeline()

    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
    xoutBoundingBoxDepthMapping = device.getOutputQueue(name="boundingBoxDepthMapping", maxSize=4, blocking=False)
    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)

    frame = None
    detections = []

    startTime = time.monotonic()
    counter = 0
    fps = 0
videoEncoder.setDefaultProfilePreset(1920, 1080, 30, dai.VideoEncoderProperties.Profile.H265_MAIN)

nn.setConfidenceThreshold(0.5)
nn.setBlobPath(nnPath)
nn.setNumInferenceThreads(2)
nn.input.setBlocking(False)

# Linking
camRgb.video.link(videoEncoder.input)
camRgb.preview.link(xoutRgb.input)
camRgb.preview.link(nn.input)
videoEncoder.bitstream.link(videoOut.input)
nn.out.link(nnOut.input)

# Connect to device and start pipeline
with dai.Device(pipeline) as device, open('video.h265', 'wb') as videoFile:

    # Queues
    queue_size = 8
    qRgb = device.getOutputQueue("rgb", queue_size)
    qDet = device.getOutputQueue("nn", queue_size)
    qRgbEnc = device.getOutputQueue('h265', maxSize=30, blocking=True)

    frame = None
    detections = []

    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
Esempio n. 26
0
This code can be helpful to get started with experiments involving depth information.
The beauty of OAK-D is that it makes the process of generating the disparity map information super easy.

Useage:
    python3 code3.py
"""

import sys
sys.path.append('/home/pi/depthai/')
import consts.resource_paths
import cv2
import depthai
import numpy as np

device = depthai.Device()

out_h, out_w = [400, 640]
output_file = "disparity_color.avi"
fps = 30
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out_wr = cv2.VideoWriter(output_file, fourcc, fps, (out_w, out_h))

if not device:
    raise RuntimeError("Error initializing device. Try to reset it.")

pipeline = device.create_pipeline(
    config={
        'streams': [
            'left', 'right', 'metaout', {
                'name': 'disparity_color',
Esempio n. 27
0
def test_pipeline():
    print("Creating DepthAI device")
    with dai.Device() as device:
        cams = device.getConnectedCameras()
        depth_enabled = dai.CameraBoardSocket.LEFT in cams and dai.CameraBoardSocket.RIGHT in cams
        if depth_enabled:
            pipeline, streams = create_stereo_depth_pipeline(source_camera)
        else:
            pipeline, streams = create_rgb_cam_pipeline()
        #pipeline, streams = create_mono_cam_pipeline()

        print("Starting pipeline")
        device.startPipeline(pipeline)

        in_streams = []
        if not source_camera:
            # Reversed order trick:
            # The sync stage on device side has a timeout between receiving left
            # and right frames. In case a delay would occur on host between sending
            # left and right, the timeout will get triggered.
            # We make sure to send first the right frame, then left.
            in_streams.extend(['in_right', 'in_left'])
        in_q_list = []
        inStreamsCameraID = []
        for s in in_streams:
            q = device.getInputQueue(s)
            in_q_list.append(q)
            inStreamsCameraID = [
                dai.CameraBoardSocket.RIGHT, dai.CameraBoardSocket.LEFT
            ]

        # Create a receive queue for each stream
        q_list = []
        for s in streams:
            q = device.getOutputQueue(s, 8, blocking=False)
            q_list.append(q)

        # Need to set a timestamp for input frames, for the sync stage in Stereo node
        timestamp_ms = 0
        index = 0
        while True:
            # Handle input streams, if any
            if in_q_list:
                dataset_size = 2  # Number of image pairs
                frame_interval_ms = 33
                for i, q in enumerate(in_q_list):
                    name = q.getName()
                    path = 'dataset/' + str(index) + '/' + name + '.png'
                    data = cv2.imread(path,
                                      cv2.IMREAD_GRAYSCALE).reshape(720 * 1280)
                    tstamp = datetime.timedelta(seconds=timestamp_ms // 1000,
                                                milliseconds=timestamp_ms %
                                                1000)
                    img = dai.ImgFrame()
                    img.setData(data)
                    img.setTimestamp(tstamp)
                    img.setInstanceNum(inStreamsCameraID[i])
                    img.setType(dai.ImgFrame.Type.RAW8)
                    img.setWidth(1280)
                    img.setHeight(720)
                    q.send(img)
                    if timestamp_ms == 0:  # Send twice for first iteration
                        q.send(img)
                    print("Sent frame: {:25s}".format(path), 'timestamp_ms:',
                          timestamp_ms)
                timestamp_ms += frame_interval_ms
                index = (index + 1) % dataset_size
                if 1:  # Optional delay between iterations, host driven pipeline
                    sleep(frame_interval_ms / 1000)
            # Handle output streams
            for q in q_list:
                name = q.getName()
                image = q.get()
                #print("Received frame:", name)
                # Skip some streams for now, to reduce CPU load
                if name in ['left', 'right', 'depth']: continue
                frame = convert_to_cv2_frame(name, image)
                cv2.imshow(name, frame)
            if cv2.waitKey(1) == ord('q'):
                break
Esempio n. 28
0
videoOut = pipeline.createXLinkOut()
videoOut.setStreamName('h265')
videoEncoder.bitstream.link(videoOut.input)

# Create empty lists
qRgb_list = []
output_path_list = []
videoFile_list = []
csvFile_list = []
csvWriter_list = []
n_oak = 0

with contextlib.ExitStack() as stack:
    # look for connected Depthai devices
    for device_info in dai.Device.getAllAvailableDevices():
        device = stack.enter_context(dai.Device(pipeline, device_info))
        print("Connected to " + device_info.getMxId())
        # start pipeline
        device.startPipeline()
        # create output filename
        output_path_list.append(out_prefix + str(n_oak))
        # Output queue will be used to get the rgb frames from the output defined above
        qRgb = device.getOutputQueue(name="h265", maxSize=fps, blocking=False)
        qRgb_list.append(qRgb)
        n_oak += 1

    # for each OAK found, create H265 and csv file
    for i in range(0, n_oak):
        videoFile_list.append(open(output_path_list[i] + '.h265', 'wb'))
        csvFile_list.append(open(output_path_list[i] + ".csv", mode='a'))
        csvWriter_list.append(
Esempio n. 29
0
xoutDepth.setStreamName("depth")

# MonoCamera
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT)
# monoLeft.setFps(5.0)
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_720_P)
monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT)
# monoRight.setFps(5.0)

# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)
stereo.depth.link(xoutDepth.input)

# Connect to device and start pipeline
with dai.Device(pipeline) as device:

    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)

    while True:
        # blocking call, will wait until a new data has arrived
        inDepth = depthQueue.get()
        frame = inDepth.getFrame()

        # frame is ready to be shown
        cv2.imshow("depth", frame)

        if cv2.waitKey(1) == ord('q'):
            break
Esempio n. 30
0
        )
        if config['streams'].count('video') == 1:
            config['streams'].remove('video')

# Create a list of enabled streams ()
stream_names = [
    stream if isinstance(stream, str) else stream['name']
    for stream in stream_list
]

enable_object_tracker = 'object_tracker' in stream_names

device = None
if debug_mode:
    print('Cmd file: ', cmd_file, ' args["device_id"]: ', args['device_id'])
    device = depthai.Device(cmd_file, args['device_id'])
else:
    device = depthai.Device(args['device_id'], usb2_mode)

print('Available streams: ' + str(device.get_available_streams()))

# create the pipeline, here is the first connection with the device
p = device.create_pipeline(config=config)

if p is None:
    print('Pipeline is not created.')
    exit(3)

nn2depth = device.get_nn_to_depth_bbox_mapping()

t_start = time()