def _track_in_frame(self, frame, method="camshift"): self._last_frame = frame if self._ever_detected: roi_for_tracking = self.get_roi_to_use(frame) mask = self.create_hand_mask(frame) x, y, w, h = roi_for_tracking track_window = tuple(roi_for_tracking) # set up the ROI for tracking roi = roi_for_tracking.extract_from_frame(frame) if self._debug: print roi_for_tracking cv2.imshow( "DEBUG: HandDetection_lib: _track_in_frame (frame_roied)", roi) # fi masked frame is only 1 channel if len(frame.shape) == 2 or (len(frame.shape) == 3 and frame.shape[2] == 1): hsv_roi = cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB) hsv_roi = cv2.cvtColor(hsv_roi, cv2.COLOR_RGB2HSV) hsv = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) hsv = cv2.cvtColor(hsv, cv2.COLOR_BGR2HSV) else: hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # mask = cv2.inRange(hsv_roi, np.array((0., 60., 32.)), np.array((180., 255., 255.))) roi_mask = mask[y:y + h, x:x + w] if self._debug: cv2.imshow( "DEBUG: HandDetection_lib: follow (ROI extracted mask)", roi_mask) roi_hist = cv2.calcHist([hsv_roi], [0], roi_mask, [180], [0, 180]) cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX) # Setup the termination criteria, either 10 iteration or move by atleast 1 pt term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1) dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1) # apply meanshift to get the new location if method == "meanshift": tracked, new_track_window = cv2.meanShift( dst, track_window, term_crit) self._tracked = (tracked != 0) else: rotated_rect, new_track_window = cv2.CamShift( dst, track_window, term_crit) intersection_rate = roi_for_tracking.intersection_rate( Roi(new_track_window)) if intersection_rate and roi_for_tracking != Roi( new_track_window): self._tracked = True else: self._tracked = False if self._tracked: self.tracking_roi = Roi(new_track_window) else: self._tracked = False
def get_roi_to_use(self, frame): """ Calculate the roi to be used depending on the situation of the hand (initial, detected, tracked) :param frame: :return: """ current_roi = None if self._detected: current_roi = self.detection_roi else: # if we already have failed to detect we use the extended_roi if self._consecutive_detection_fails > 0: if self._tracked: current_roi = self.tracking_roi else: current_roi = self.extended_roi else: # Not detected and not consecutive fails on detection. # It's probably the first time we try to detect. # If no initial_roi is given an square of 200 x 200 is taken on the center if self.initial_roi is not None and self.initial_roi != Roi(): current_roi = self.initial_roi else: current_roi = Roi.from_frame(frame, SIDE.CENTER, 50) assert current_roi != Roi(), "hand can't be detected on a %s roi of the frame" % str(current_roi) return current_roi
def test_roi_from_frame_left(self): r = Roi.from_frame(frame, SIDE.LEFT, 100) self.assertEqual(r, [0, 0, 640, 480]) r = Roi.from_frame(frame, SIDE.LEFT, 90) self.assertEqual(r, [0, 0, 640 - 64, 480]) r = Roi.from_frame(frame, SIDE.LEFT, 10) self.assertEqual(r, [0, 0, 64, 480])
def test_roi_from_frame_center(self): r = Roi.from_frame(frame, SIDE.CENTER, 100) self.assertEqual(r, [0, 0, 640, 480]) r = Roi.from_frame(frame, SIDE.CENTER, 90) self.assertEqual(r, [(64 / 2), (48 / 2), 640 - 64, 480 - 48]) r = Roi.from_frame(frame, SIDE.CENTER, 10) self.assertEqual(r, [640 / 2 - 64 / 2, 480 / 2 - 48 / 2, 64, 48])
def extended_roi(self, value): assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "extended_roi must be of the type Roi" if isinstance(value, Roi): self._extended_roi = value else: self._extended_roi = Roi(value) # Extended_roi must be limited to the initial_roi self._extended_roi.limit_to_roi(self.initial_roi)
def test_roi_from_frame_bottom(self): r = Roi.from_frame(frame, SIDE.BOTTOM, 100) self.assertEqual(r, [0, 0, 640, 480]) r = Roi.from_frame(frame, SIDE.BOTTOM, 90) self.assertEqual(r, [0, 48, 640, 480 - 48]) r = Roi.from_frame(frame, SIDE.BOTTOM, 10) self.assertEqual(r, [0, 480 - 48, 640, 48])
def test_roi_from_frame_right(self): r = Roi.from_frame(frame, SIDE.RIGHT, 100) self.assertEqual(r, [0, 0, 640, 480]) r = Roi.from_frame(frame, SIDE.RIGHT, 90) self.assertEqual(r, [64, 0, 640 - 64, 480]) r = Roi.from_frame(frame, SIDE.RIGHT, 10) self.assertEqual(r, [640 - 64, 0, 64, 480])
def test_roi_from_frame_top(self): """ Test that check the creation of roi from frames """ r = Roi.from_frame(frame, SIDE.TOP, 100) self.assertEqual(r, [0, 0, 640, 480]) r = Roi.from_frame(frame, SIDE.TOP, 90) self.assertEqual(r, [0, 0, 640, 480 - 48]) r = Roi.from_frame(frame, SIDE.TOP, 10) self.assertEqual(r, [0, 0, 640, 48])
def test_hand_detect(self): """ Test that check the creation of roi from frames """ hand = Hand() hand.depth_threshold = 130 expected_results = { "20190725130248.png": [False, False], "20190725130249.png": [False, False], "20190725130250.png": [True, True], "20190725130251.png": [False, True], # fingers too close and it's considered not a hand "20190725130252.png": [True, True], "20190725130253.png": [True, True], "20190725130254.png": [False, True], "20190725130255.png": [False, True], "20190725130256.png": [False, True], "20190725130257.png": [False, True], "20190725130258.png": [False, True] } full_path = "/home/robolab/robocomp/components/robocomp-robolab/components/detection/handDetection/src/images/depth_images" for file in sorted(os.listdir(full_path)): if file.endswith(".png") and file in expected_results: frame = RGBDFrame(cv2.imread(os.path.join(full_path, file), 0)) hand.initial_roi = Roi.from_frame(frame, SIDE.CENTER, 50) hand.detect_and_track(frame) print("testing file %s" % file) self.assertEqual(hand.detected, expected_results[file][0]) self.assertEqual(hand.tracked, expected_results[file][1]) frame = self.draw_in_frame(hand, frame) cv2.imshow("final", frame) key = cv2.waitKey(5000) if key == 112: while cv2.waitKey(1000) != 112: pass
def agglomerate(setup, iteration, sample, thresholds, output_basenames, *args, **kwargs): thresholds = list(thresholds) aff_data_dir = os.path.join(os.getcwd(), 'processed', setup, str(iteration)) affs_filename = os.path.join(aff_data_dir, sample + '.hdf') gt_data_dir = os.path.join(os.getcwd(), '../01_data') gt_filename = os.path.join(gt_data_dir, sample + '.hdf') print "Agglomerating " + sample + " with " + setup + ", iteration " + str( iteration) + " at thresholds " + str(thresholds) print "Reading affinities..." with h5py.File(affs_filename, 'r') as affs_file: affs = np.array(affs_file['volumes/predicted_affs']) affs_offset_nm = Coordinate( affs_file['volumes/predicted_affs'].attrs['offset']) resolution = Coordinate( affs_file['volumes/predicted_affs'].attrs['resolution']) affs_roi = Roi(affs_offset_nm, resolution * affs.shape[1:]) print "affs ROI: " + str(affs_roi) print "Reading ignore mask..." with h5py.File(gt_filename, 'r') as gt_file: ignore_mask = np.array(gt_file['volumes/labels/ignore']) start = time.time() agglomerate_lineages(affs, ignore_mask, thresholds, affs_roi, resolution, output_basenames, **kwargs) print "Finished agglomeration in " + str(time.time() - start) + "s"
def initial_roi(self, value): # assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "initial_roi must be of the type Roi" if isinstance(value, Roi): self._initial_roi = value else: self._initial_roi = Roi(value) self.extended_roi = self._initial_roi
def __init__(self, config, attack_loader): # Data loader self.attack_loader = attack_loader # Models self.model_net = None self.optimizer = None self.img_ch = config.img_ch self.output_ch = config.output_ch self.objective = config.objective self.criterion = torch.nn.CrossEntropyLoss() self.augmentation_prob = config.augmentation_prob self.config =config self.roi=Roi() # Hyper-parameters self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 print("@@@@@@@@@@@@@@@@@@@@@@@ LR B1 & B2 for Adam ------> ",self.lr,self.beta1,self.beta2) # Training settings self.num_epochs = config.num_epochs self.num_epochs_decay = config.num_epochs_decay self.batch_size = config.batch_size # Step size self.log_step = config.log_step #self.val_step = config.val_step self.val_step = 1000000 # Path self.model_path = config.model_path self.result_path = config.result_path self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.model_type = config.model_type self.objective = config.objective self.build_model()
def create_piece(self): # pion y = 76 x = 1 for pion in range(8): pion = Pion("assets/pion_b.png", x, y) x += 75 self.pion_b.append(pion) y = 451 x = 1 for pion in range(8): pion = Pion("assets/pion_n.png", x, y) x += 75 self.pion_n.append(pion) # tour self.tour_b.append(Tour("assets/tour_b.png", 1, 1)) self.tour_b.append(Tour("assets/tour_b.png", 526, 1)) self.tour_n.append(Tour("assets/tour_n.png", 1, 526)) self.tour_n.append(Tour("assets/tour_n.png", 526, 526)) # cavalier self.cavalier_b.append(Cavalier("assets/cavalier_b.png", 76, 1)) self.cavalier_b.append(Cavalier("assets/cavalier_b.png", 451, 1)) self.cavalier_n.append(Cavalier("assets/cavalier_n.png", 76, 526)) self.cavalier_n.append(Cavalier("assets/cavalier_n.png", 451, 526)) # fou self.fou_b.append(Fou("assets/fou_b.png", 151, 1)) self.fou_b.append(Fou("assets/fou_b.png", 376, 1)) self.fou_n.append(Fou("assets/fou_n.png", 151, 526)) self.fou_n.append(Fou("assets/fou_n.png", 376, 526)) # roi self.roi_b.append(Roi("assets/roi_b.png", 226, 1)) self.roi_n.append(Roi("assets/roi_n.png", 301, 526)) # reine self.reine_b.append(Reine("assets/reine_b.png", 301, 1)) self.reine_n.append(Reine("assets/reine_n.png", 226, 526))
def __init__(self): super().__init__() # Inputs self.i_avg_valid = Signal() self.i_frame_done = Signal() self.i_x = Signal(9) self.i_y = Signal(9) self.i_roi = Roi() # Outputs self.o_min = Rgb565() self.o_max = Rgb565() self.o_avg = Rgb565()
def __init__(self, dw=8): # Parameters self.dw = dw # Inputs self.i_p = Signal(dw) self.i_valid = Signal() self.i_clear = Signal() self.i_bin = Signal(dw) self.i_x = Signal(9) self.i_y = Signal(9) self.i_roi = Roi() # Outputs self.o_ready = Signal() self.o_val = Signal(18)
def __init__(self, detector): """ Hand class attributes values. """ self._detector = detector self._id = None self._fingertips = [] self._intertips = [] self._center_of_mass = None self._finger_distances = [] self._average_defect_distance = [] self._contour = None self._consecutive_tracking_fails = 0 self._consecutive_detection_fails = 0 self._frame_count = 0 self._color = get_random_color() self._confidence = 0 self._tracked = False self._detected = False self._detection_status = 0 self._position_history = [] # The region of the image where the hand is expected to be located when initialized or lost self._initial_roi = Roi() # The region where the hand have been detected the last time self._detection_roi = Roi() # The region where the hand was tracked the last time self._tracking_roi = Roi() # Region extended from tracking_roi to a maximum of initial_roi to look for the hand self._extended_roi = Roi() self._mask_mode = MASKMODES.COLOR self._debug = True self._depth_threshold = -1 self._last_frame = None self._ever_detected = False
def update_hand_with_contour(self, hand_contour): """ Attributes of the hand are calculated from the hand contour. TODO: calculate a truth value A score of 100 is the maximum value for the hand truth. This value is calculated like this: A hand is expected to have 5 finger tips, 4 intertips, a center of mass :param hand_contour: calculated contour that is expected to describe a hand :return: None """ hull2 = cv2.convexHull(hand_contour, returnPoints=False) # Get defect points defects = cv2.convexityDefects(hand_contour, hull2) if defects is not None: estimated_fingertips_coords, \ estimated_fingertips_indexes, \ estimated_intertips_coords, \ estimated_intertips_indexes = self._calculate_fingertips(hand_contour, defects) is_hand = self.is_hand(estimated_fingertips_coords, estimated_intertips_coords, strict=True) if is_hand: self._fingertips = estimated_fingertips_coords self._intertips = estimated_intertips_coords if len(estimated_fingertips_coords) == 5: fingers_contour = np.take(hand_contour, estimated_fingertips_indexes + estimated_intertips_indexes, axis=0, mode="wrap") bounding_rect, hand_circle, self._contour = self.get_hand_bounding_rect_from_fingers( hand_contour, fingers_contour) # detection roi is set to the bounding rect of the fingers upscaled 20 pixels # self.detection_roi = Roi(bounding_rect) self.detection_roi = Roi(bounding_rect).upscaled(Roi.from_frame(self._last_frame, SIDE.CENTER, 100), 10) if self._debug: to_show = self._last_frame.copy() cv2.drawContours(to_show, [hand_contour], -1, (255, 255, 255), 2) cv2.drawContours(to_show, [fingers_contour], -1, (200, 200, 200), 2) to_show = self.detection_roi.draw_on_frame(to_show) # cv2.rectangle(to_show, (self.detection_roi.y, self.detection_roi.x), (self.detection_roi.y + self.detection_roi.height, self.detection_roi.x + self.detection_roi.width), [255, 255, 0]) # (x, y, w, h) = cv2.boundingRect(hand_contour) # cv2.rectangle(to_show, (self.detection_roi.y, self.detection_roi.x), (self.detection_roi.x + self.detection_roi.height, self.detection_roi.x + self.detection_roi.width), [255, 255, 0]) cv2.imshow("update_hand_with_contour", to_show) self._detected = True self._detection_status = 1 self._ever_detected = True self._confidence = 100 else: self._detection_status = -1 self._detected = False self._confidence = 0 return else: self._detection_status = -1 self._detected = False self._confidence = 0 return # Find moments of the largest contour moments = cv2.moments(hand_contour) center_of_mass = None finger_distances = [] average_defect_distance = None # Central mass of first order moments if moments['m00'] != 0: cx = int(moments['m10'] / moments['m00']) # cx = M10/M00 cy = int(moments['m01'] / moments['m00']) # cy = M01/M00 center_of_mass = (cx, cy) self._center_of_mass = center_of_mass self._position_history.append(center_of_mass) if center_of_mass is not None and len(estimated_intertips_coords) > 0: # Distance from each finger defect(finger webbing) to the center mass distance_between_defects_to_center = [] for far in estimated_intertips_coords: x = np.array(far) center_mass_array = np.array(center_of_mass) distance = np.sqrt( np.power(x[0] - center_mass_array[0], 2) + np.power(x[1] - center_mass_array[1], 2) ) distance_between_defects_to_center.append(distance) # Get an average of three shortest distances from finger webbing to center mass sorted_defects_distances = sorted(distance_between_defects_to_center) average_defect_distance = np.mean(sorted_defects_distances[0:2]) self._average_defect_distance = average_defect_distance # # Get fingertip points from contour hull # # If points are in proximity of 80 pixels, consider as a single point in the group # finger = [] # for i in range(0, len(hull) - 1): # if (np.absolute(hull[i][0][0] - hull[i + 1][0][0]) > 10) or ( # np.absolute(hull[i][0][1] - hull[i + 1][0][1]) > 10): # if hull[i][0][1] < 500: # finger.append(hull[i][0]) # # # # The fingertip points are 5 hull points with largest y coordinates # finger = sorted(finger, key=lambda x: x[1]) # fingers = finger[0:5] if center_of_mass is not None and len(estimated_fingertips_coords) > 0: # Calculate distance of each finger tip to the center mass finger_distances = [] for i in range(0, len(estimated_fingertips_coords)): distance = np.sqrt( np.power(estimated_fingertips_coords[i][0] - center_of_mass[0], 2) + np.power( estimated_fingertips_coords[i][1] - center_of_mass[0], 2)) finger_distances.append(distance) self._finger_distances = finger_distances else: self._detection_status = -2 self._detected = False self._confidence = 0 return
def test_roi_upscale(self): """ Test that roi upscale right """ roi = Roi([0, 0, 640, 480]) limit = Roi([0, 0, 640, 480]) a = roi.upscaled(limit, 10) self.assertEqual(a, [0, 0, 640, 480]) roi = Roi([0, 0, 320, 240]) a = roi.upscaled(limit, 10) self.assertEqual(a, [0, 0, 330, 250]) roi = Roi([10, 10, 320, 240]) a = roi.upscaled(limit, 10) self.assertEqual(a, [5, 5, 330, 250]) roi = Roi([40, 40, 50, 50]) limit = Roi([30, 30, 60, 60]) a = roi.upscaled(limit, 10) self.assertEqual(a, [35, 35, 60, 60])
def elaborate(self, platform): # VGA constants pixel_f = self.timing.pixel_freq hsync_front_porch = self.timing.h_front_porch hsync_pulse_width = self.timing.h_sync_pulse hsync_back_porch = self.timing.h_back_porch vsync_front_porch = self.timing.v_front_porch vsync_pulse_width = self.timing.v_sync_pulse vsync_back_porch = self.timing.v_back_porch # Pins clk25 = platform.request("clk25") ov7670 = platform.request("ov7670") led = [platform.request("led", i) for i in range(8)] leds = Cat([i.o for i in led]) led8_2 = platform.request("led8_2") leds8_2 = Cat([led8_2.leds[i] for i in range(8)]) led8_3 = platform.request("led8_3") leds8_3 = Cat([led8_3.leds[i] for i in range(8)]) leds16 = Cat(leds8_3, leds8_2) btn1 = platform.request("button_fire", 0) btn2 = platform.request("button_fire", 1) up = platform.request("button_up", 0) down = platform.request("button_down", 0) pwr = platform.request("button_pwr", 0) left = platform.request("button_left", 0) right = platform.request("button_right", 0) sw = Cat([platform.request("switch", i) for i in range(4)]) uart = platform.request("uart") divisor = int(platform.default_clk_frequency // 460800) esp32 = platform.request("esp32_spi") csn = esp32.csn sclk = esp32.sclk copi = esp32.copi cipo = esp32.cipo m = Module() # Clock generator. m.domains.sync = cd_sync = ClockDomain("sync") m.domains.pixel = cd_pixel = ClockDomain("pixel") m.domains.shift = cd_shift = ClockDomain("shift") m.submodules.ecp5pll = pll = ECP5PLL() pll.register_clkin(clk25, platform.default_clk_frequency) pll.create_clkout(cd_sync, platform.default_clk_frequency) pll.create_clkout(cd_pixel, pixel_f) pll.create_clkout(cd_shift, pixel_f * 5.0 * (1.0 if self.ddr else 2.0)) # Add CamRead submodule camread = CamRead() m.submodules.camread = camread # Camera config cam_x_res = 640 cam_y_res = 480 camconfig = CamConfig() m.submodules.camconfig = camconfig # Connect the camera pins and config and read modules m.d.comb += [ ov7670.cam_RESET.eq(1), ov7670.cam_PWON.eq(0), ov7670.cam_XCLK.eq(clk25.i), ov7670.cam_SIOC.eq(camconfig.sioc), ov7670.cam_SIOD.eq(camconfig.siod), camconfig.start.eq(btn1), camread.p_data.eq(Cat([ov7670.cam_data[i] for i in range(8)])), camread.href.eq(ov7670.cam_HREF), camread.vsync.eq(ov7670.cam_VSYNC), camread.p_clock.eq(ov7670.cam_PCLK) ] # Create the uart m.submodules.serial = serial = AsyncSerial(divisor=divisor, pins=uart) # Frame buffer x_res = cam_x_res // 2 y_res = cam_y_res buffer = Memory(width=16, depth=x_res * y_res) m.submodules.r = r = buffer.read_port() m.submodules.w = w = buffer.write_port() # Button debouncers m.submodules.debup = debup = Debouncer() m.submodules.debdown = debdown = Debouncer() m.submodules.debosd = debosd = Debouncer() m.submodules.debsel = debsel = Debouncer() m.submodules.debsnap = debsnap = Debouncer() m.submodules.debhist = debhist = Debouncer() # Connect the buttons to debouncers m.d.comb += [ debup.btn.eq(up), debdown.btn.eq(down), debosd.btn.eq(pwr), debsel.btn.eq(right), debsnap.btn.eq(left), debhist.btn.eq(btn2) ] # Image processing configuration registers flip = Signal(2, reset=1) # Flip the image horizontally or vertically mono_en = Signal(reset=0) # Convert to monochrome invert = Signal(reset=0) # Invert monochrome image thresh_en = Signal(reset=0) # Apply threshold to monochrome image threshold = Signal(8, reset=0) # Threshold value border = Signal(reset=0) # Use OSD to show a border filt_en = Signal(reset=0) # Apply a color filter l = Rgb565(reset=(18, 12, 6)) # Image filter low values h = Rgb565(reset=(21, 22, 14)) # Image filter high values grid = Signal(reset=0) # Use OSD to show a grid hist_view = Signal(reset=1) # Switch to histogram view hist_chan = Signal(2, reset=0) # The histogram channel to calculate ccr = CC(reset=(0, 0, 18, 12, 16)) # Color control record sharpness = Signal( unsigned(4), reset=0 ) # Used to select image convolution kernel for blur/sharpness roi = Roi() # Region on interest frozen = Signal(reset=1) # Freeze/unfreeze video display sat_en = Signal() saturation = Signal(5, reset=16) # Control synchronization of camera with fifo sync_fifo = Signal(reset=0) # OSD control signals osd_val = Signal( 4, reset=0) # Account for spurious start-up button pushes osd_on = Signal(reset=1) osd_sel = Signal(reset=1) # Snapshot signals snap = Signal(reset=0) writing = Signal(reset=0) written = Signal(reset=0) byte = Signal(reset=0) w_addr = Signal(18) # Signals for calculating histogram hist_val = Signal(6) # Signals for displaying histogram hist_color = Signal(8) hbin = Signal(6, reset=0) bin_cnt = Signal(5, reset=0) old_x = Signal(10) # Frame buffer coordinates frame_x = Signal(10) frame_y = Signal(9) # VGA signals vga_r = Signal(8) vga_g = Signal(8) vga_b = Signal(8) vga_hsync = Signal() vga_vsync = Signal() vga_blank = Signal() # Pixel from camera pix = Rgb565() # Fifo stream m.submodules.fifo_stream = fs = FifoStream() # SPI memory for remote configuration m.submodules.spimem = spimem = SpiMem(addr_bits=32) # Color Control m.submodules.cc = cc = ColorControl() # Image convolution m.submodules.imc = imc = ImageConv() # Statistics m.submodules.stats = stats = Stats() # Histogram m.submodules.hist = hist = Hist() # Filter m.submodules.fil = fil = Filt() # Monochrome m.submodules.mon = mon = Mono() # Saturation m.submodules.sat = sat = Saturation() # Sync the fifo with the camera with m.If(~sync_fifo & (camread.col == cam_x_res - 1) & (camread.row == cam_y_res - 1)): m.d.sync += sync_fifo.eq(1) with m.If(btn1): m.d.sync += sync_fifo.eq(0) # Set histogram value to the data for the chosen channel with m.Switch(hist_chan): with m.Case(0): m.d.comb += hist_val.eq(cc.o.r) with m.Case(1): m.d.comb += hist_val.eq(cc.o.g) with m.Case(2): m.d.comb += hist_val.eq(cc.o.b) with m.Case(3): m.d.comb += hist_val.eq(mon.o_m) # Copy camera data to Rgb565 record m.d.comb += [ pix.r.eq(camread.pixel_data[11:]), pix.g.eq(camread.pixel_data[5:11]), pix.b.eq(camread.pixel_data[:5]) ] # Input image processing pipeline pipeline = [ [ fs, { "i": pix, # Fifo stream "i_valid": camread.pixel_valid & camread.col[0], "i_ready": cc.o_ready, "i_en": sync_fifo }, True ], [sat, { "i_en": sat_en, "i_saturation": saturation }, True], [cc, { "i_cc": ccr }, True], # Color control [ fil, { "i_en": filt_en, # Color filter "i_frame_done": fs.o_eof, "i_l": l, "i_h": h }, True ], [ mon, { "i_en": mono_en | invert | thresh_en, # Monochrome, invert and threshold "i_invert": invert, "i_thresh": thresh_en, "i_threshold": threshold }, True ], [ imc, { "i_ready": 1, # Image convolution "i_reset": ~fs.i_en, "i_sel": sharpness }, True ], [ stats, { "i": cc.o, # Statistics "i_valid": cc.o_valid, "i_avg_valid": (fs.o_x >= 32) & (fs.o_x < 288) & (fs.o_y >= 112) & (fs.o_y < 368), "i_frame_done": fs.o_eof, "i_x": fs.o_x, "i_y": fs.o_y, "i_roi": roi }, False ], [ hist, { "i_p": hist_val, # Histogram "i_valid": mon.o_valid, "i_clear": fs.o_eof, "i_x": fs.o_x, "i_y": fs.o_y, "i_roi": roi, "i_bin": hbin }, False ] ] def execute(pl): us = None # Upstream for p in pl: mod = p[0] d = p[1] st = p[2] # Stream or Sink if st and us is not None: m.d.comb += mod.i.eq(us.o) m.d.comb += mod.i_valid.eq(us.o_valid) m.d.comb += us.i_ready.eq(mod.o_ready) if st: us = mod for k in d: m.d.comb += mod.__dict__[k].eq(d[k]) execute(pipeline) # Take a snapshot, freeze the camera, and write the framebuffer to the uart # Note that this suspends video output with m.If(debsnap.btn_down | (spimem.wr & (spimem.addr == 22))): with m.If(frozen): m.d.sync += frozen.eq(0) with m.Else(): m.d.sync += [ snap.eq(1), frozen.eq(0), w_addr.eq(0), written.eq(0), byte.eq(0) ] # Wait to end of frame after requesting snapshot, before start of writing to uart with m.If(imc.o_eof & snap): m.d.sync += [frozen.eq(1), snap.eq(0)] with m.If(~written): m.d.sync += writing.eq(1) # Connect the uart m.d.comb += [ serial.tx.data.eq(Mux(byte, r.data[8:], r.data[:8])), serial.tx.ack.eq(writing) ] # Write to the uart from frame buffer (affects video output) with m.If(writing): with m.If(w_addr == x_res * y_res): m.d.sync += [writing.eq(0), written.eq(1)] with m.Elif(serial.tx.ack & serial.tx.rdy): m.d.sync += byte.eq(~byte) with m.If(byte): m.d.sync += w_addr.eq(w_addr + 1) # Connect spimem m.d.comb += [ spimem.csn.eq(~csn), spimem.sclk.eq(sclk), spimem.copi.eq(copi), cipo.eq(spimem.cipo), ] # Writable configuration registers spi_wr_vals = Array([ ccr.brightness, ccr.redness, ccr.greenness, ccr.blueness, l.r, h.r, l.g, h.g, l.b, h.b, sharpness, filt_en, border, mono_en, invert, grid, hist_view, roi.x[1:], roi.y[1:], roi.w[1:], roi.h[1:], roi.en, None, None, None, threshold, thresh_en, hist_chan, flip, None, None, None, None, None, None, None, None, None, frozen, None, None, sat_en, saturation, ccr.offset ]) with m.If(spimem.wr): with m.Switch(spimem.addr): for i in range(len(spi_wr_vals)): if spi_wr_vals[i] is not None: with m.Case(i): m.d.sync += spi_wr_vals[i].eq(spimem.dout) # Readable configuration registers spi_rd_vals = Array([ ccr.brightness, ccr.redness, ccr.greenness, ccr.blueness, l.r, h.r, l.g, h.g, l.b, h.b, sharpness, filt_en, border, mono_en, invert, grid, hist_view, roi.x[1:], roi.y[1:], roi.w[1:], roi.h[1:], roi.en, fil.o_nz[16:], fil.o_nz[8:16], fil.o_nz[:8], threshold, thresh_en, hist_chan, flip, stats.o_min.r, stats.o_min.g, stats.o_min.b, stats.o_max.r, stats.o_max.g, stats.o_max.b, stats.o_avg.r, stats.o_avg.g, stats.o_avg.b, frozen, writing, written, sat_en, saturation, ccr.offset ]) with m.If(spimem.rd): with m.Switch(spimem.addr): for i in range(len(spi_rd_vals)): with m.Case(i): m.d.sync += spimem.din.eq(spi_rd_vals[i]) # Add VGA generator m.submodules.vga = vga = VGA( resolution_x=self.timing.x, hsync_front_porch=hsync_front_porch, hsync_pulse=hsync_pulse_width, hsync_back_porch=hsync_back_porch, resolution_y=self.timing.y, vsync_front_porch=vsync_front_porch, vsync_pulse=vsync_pulse_width, vsync_back_porch=vsync_back_porch, bits_x=16, # Play around with the sizes because sometimes bits_y=16 # a smaller/larger value will make it pass timing. ) # Fetch histogram for display m.d.sync += old_x.eq(vga.o_beam_x) with m.If(vga.o_beam_x == 0): m.d.sync += [hbin.eq(0), bin_cnt.eq(0)] with m.Elif(vga.o_beam_x != old_x): m.d.sync += bin_cnt.eq(bin_cnt + 1) with m.If(bin_cnt == 19): m.d.sync += [bin_cnt.eq(0), hbin.eq(hbin + 1)] # Switch between camera and histogram view with m.If(debhist.btn_down): m.d.sync += hist_view.eq(~hist_view) # Connect frame buffer, with optional x and y flip m.d.comb += [ frame_x.eq( Mux(flip[0], x_res - 1 - vga.o_beam_x[1:], vga.o_beam_x[1:])), frame_y.eq(Mux(flip[1], y_res - 1 - vga.o_beam_y, vga.o_beam_y)), w.en.eq(imc.o_valid & ~frozen), w.addr.eq(imc.o_y * x_res + imc.o_x), w.data.eq(imc.o.as_data()), r.addr.eq(Mux(writing, w_addr, frame_y * x_res + frame_x)) ] # Apply the On-Screen Display (OSD) m.submodules.osd = osd = OSD() m.d.comb += [ osd.x.eq(vga.o_beam_x), osd.y.eq(vga.o_beam_y), hist_color.eq(Mux((479 - osd.y) < hist.o_val[8:], 0xff, 0x00)), osd.i_r.eq( Mux(hist_view, Mux((hist_chan == 0) | (hist_chan == 3), hist_color, 0), Cat(Const(0, unsigned(3)), r.data[11:16]))), osd.i_g.eq( Mux(hist_view, Mux((hist_chan == 1) | (hist_chan == 3), hist_color, 0), Cat(Const(0, unsigned(2)), r.data[5:11]))), osd.i_b.eq( Mux(hist_view, Mux((hist_chan == 2) | (hist_chan == 3), hist_color, 0), Cat(Const(0, unsigned(3)), r.data[0:5]))), osd.on.eq(osd_on), osd.osd_val.eq(osd_val), osd.sel.eq(osd_sel), osd.grid.eq(grid), osd.border.eq(border), osd.roi.eq(roi.en & ~hist_view), osd.roi_x.eq(roi.x), osd.roi_y.eq(roi.y), osd.roi_w.eq(roi.w), osd.roi_h.eq(roi.h) ] # OSD control dummy = Signal() osd_vals = Array([ ccr.offset, ccr.brightness, ccr.redness, ccr.greenness, ccr.blueness, sharpness, sat_en, saturation, mono_en, invert, thresh_en, threshold, hist_chan, Cat(border, grid), flip, filt_en ]) with m.If(debosd.btn_down): m.d.sync += osd_on.eq(~osd_on) with m.If(osd_on): with m.If(debsel.btn_down): m.d.sync += osd_sel.eq(~osd_sel) with m.If(debup.btn_down): with m.If(~osd_sel): m.d.sync += osd_val.eq(osd_val - 1) with m.Else(): with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): if (len(osd_vals[i]) == 1): m.d.sync += osd_vals[i].eq(1) else: m.d.sync += osd_vals[i].eq(osd_vals[i] + 1) with m.If(debdown.btn_down): with m.If(~osd_sel): m.d.sync += osd_val.eq(osd_val + 1) with m.Else(): with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): if (len(osd_vals[i]) == 1): m.d.sync += osd_vals[i].eq(0) else: m.d.sync += osd_vals[i].eq(osd_vals[i] - 1) # Show configuration values on leds with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): m.d.comb += leds.eq(osd_vals[i]) # Generate VGA signals m.d.comb += [ vga.i_clk_en.eq(1), vga.i_test_picture.eq(0), vga.i_r.eq(osd.o_r), vga.i_g.eq(osd.o_g), vga.i_b.eq(osd.o_b), vga_r.eq(vga.o_vga_r), vga_g.eq(vga.o_vga_g), vga_b.eq(vga.o_vga_b), vga_hsync.eq(vga.o_vga_hsync), vga_vsync.eq(vga.o_vga_vsync), vga_blank.eq(vga.o_vga_blank), ] # VGA to digital video converter. tmds = [Signal(2) for i in range(4)] m.submodules.vga2dvid = vga2dvid = VGA2DVID( ddr=self.ddr, shift_clock_synchronizer=False) m.d.comb += [ vga2dvid.i_red.eq(vga_r), vga2dvid.i_green.eq(vga_g), vga2dvid.i_blue.eq(vga_b), vga2dvid.i_hsync.eq(vga_hsync), vga2dvid.i_vsync.eq(vga_vsync), vga2dvid.i_blank.eq(vga_blank), tmds[3].eq(vga2dvid.o_clk), tmds[2].eq(vga2dvid.o_red), tmds[1].eq(vga2dvid.o_green), tmds[0].eq(vga2dvid.o_blue), ] # GPDI pins if (self.ddr): # Vendor specific DDR modules. # Convert SDR 2-bit input to DDR clocked 1-bit output (single-ended) # onboard GPDI. m.submodules.ddr0_clock = Instance("ODDRX1F", i_SCLK=ClockSignal("shift"), i_RST=0b0, i_D0=tmds[3][0], i_D1=tmds[3][1], o_Q=self.o_gpdi_dp[3]) m.submodules.ddr0_red = Instance("ODDRX1F", i_SCLK=ClockSignal("shift"), i_RST=0b0, i_D0=tmds[2][0], i_D1=tmds[2][1], o_Q=self.o_gpdi_dp[2]) m.submodules.ddr0_green = Instance("ODDRX1F", i_SCLK=ClockSignal("shift"), i_RST=0b0, i_D0=tmds[1][0], i_D1=tmds[1][1], o_Q=self.o_gpdi_dp[1]) m.submodules.ddr0_blue = Instance("ODDRX1F", i_SCLK=ClockSignal("shift"), i_RST=0b0, i_D0=tmds[0][0], i_D1=tmds[0][1], o_Q=self.o_gpdi_dp[0]) else: m.d.comb += [ self.o_gpdi_dp[3].eq(tmds[3][0]), self.o_gpdi_dp[2].eq(tmds[2][0]), self.o_gpdi_dp[1].eq(tmds[1][0]), self.o_gpdi_dp[0].eq(tmds[0][0]), ] return m
class Attacker(object): def __init__(self, config, attack_loader): # Data loader self.attack_loader = attack_loader # Models self.model_net = None self.optimizer = None self.img_ch = config.img_ch self.output_ch = config.output_ch self.objective = config.objective self.criterion = torch.nn.CrossEntropyLoss() self.augmentation_prob = config.augmentation_prob self.config =config self.roi=Roi() # Hyper-parameters self.lr = config.lr self.beta1 = config.beta1 self.beta2 = config.beta2 print("@@@@@@@@@@@@@@@@@@@@@@@ LR B1 & B2 for Adam ------> ",self.lr,self.beta1,self.beta2) # Training settings self.num_epochs = config.num_epochs self.num_epochs_decay = config.num_epochs_decay self.batch_size = config.batch_size # Step size self.log_step = config.log_step #self.val_step = config.val_step self.val_step = 1000000 # Path self.model_path = config.model_path self.result_path = config.result_path self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.model_type = config.model_type self.objective = config.objective self.build_model() def build_model(self): """Build generator and discriminator.""" if self.model_type =='LivDet2015': self.model_net = LivDet2015(in_ch=self.img_ch, num_classes=2) print('Building LivDet2015 model again for attacking') self.optimizer = optim.SGD(list(self.model_net.parameters()), lr=0.000001, momentum=0.9) self.model_net.to(self.device) # self.print_network(self.model_net, self.model_type) def print_network(self, model, name): """Print out the network information.""" num_params = 0 for p in model.parameters(): num_params += p.numel() print(model) print(name) print("The number of parameters: {}".format(num_params)) def to_data(self, x): """Convert variable to tensor.""" if torch.cuda.is_available(): x = x.cpu() return x.data def reset_grad(self): """Zero the gradient buffers.""" self.model_net.zero_grad() self.optimizer.zero_grad() def perturb_image(self,xs, img): if xs.ndim < 2: xs = np.array([xs]) batch = len(xs) imgs = img.repeat(batch, 1, 1, 1) xs = xs.astype(int) count = 0 for x in xs: if self.img_ch==3: pixels = np.split(x, len(x)/5) elif self.img_ch==1: pixels = np.split(x, len(x)/3) for pixel in pixels: if self.img_ch==3: x_pos, y_pos, r, g, b = pixel imgs[count, 0, x_pos, y_pos] = (r/255.0-0.4914)/0.2023 imgs[count, 1, x_pos, y_pos] = (g/255.0-0.4822)/0.1994 imgs[count, 2, x_pos, y_pos] = (b/255.0-0.4465)/0.2010 elif self.img_ch==1: x_pos, y_pos, grey = pixel imgs[count, 0, x_pos, y_pos] = (grey-0.1307)/0.3081 count += 1 return imgs def predict_classes(self,xs, img, target_calss, net, minimize=True): imgs_perturbed = self.perturb_image(xs, img.clone()) #input_image = Variable(imgs_perturbed, volatile=True).cuda() input_image = Variable(imgs_perturbed, volatile=True).to(self.device) predictions = F.softmax(net(input_image)).data.cpu().numpy()[:, target_calss] return predictions if minimize else 1 - predictions def attack_success(self,x, img, target_calss, net, targeted_attack=False, verbose=False): attack_image = self.perturb_image(x, img.clone()) #input_image = Variable(attack_image, volatile=True).cuda() input_image = Variable(attack_image, volatile=True).to(self.device) confidence = F.softmax(net(input_image)).data.cpu().numpy()[0] predicted_class = np.argmax(confidence) if (verbose): print ("Confidence: %.4f"%confidence[target_calss]) if (targeted_attack and predicted_class == target_calss) or (not targeted_attack and predicted_class != target_calss): return True def attack(self,img, label, net, target=None, pixels=1, maxiter=75, popsize=400, verbose=False): # img: 1*3*W*H tensor # label: a number targeted_attack = target is not None target_calss = target if targeted_attack else label image_size = 227 img_numpy = img.numpy()[0,:,:,:] img_numpy = np.reshape(img_numpy, (image_size, image_size, 3)) print(type(img_numpy),"<-------------- type ",img_numpy.shape) objs = self.roi.get_roi(img_numpy,w=8, threshold=.5) print(objs[0][1].start, objs[0][0].start, objs[0][1].stop, objs[0][0].stop) if self.img_ch==3: #bounds = [(0,image_size), (0,image_size), (0,255), (0,255), (0,255)] * pixels bounds = [(objs[0][1].start,objs[0][1].stop), (objs[0][0].start,objs[0][0].stop), (0,255), (0,255), (0,255)] * pixels elif self.img_ch==1: #bounds = [(0,image_size), (0,image_size), (0,1)] * pixels bounds = [(objs[0][1].start,objs[0][1].stop), (objs[0][0].start,objs[0][0].stop), (0,1)] * pixels popmul = max(1, popsize/len(bounds)) predict_fn = lambda xs: self.predict_classes( xs, img, target_calss, net, target is None) callback_fn = lambda x, convergence: self.attack_success( x, img, target_calss, net, targeted_attack, verbose) # print("type.popmul", type(popmul)) inits = np.zeros([int(popmul*len(bounds)), len(bounds)]) for init in inits: for i in range(pixels): if self.img_ch == 3: init[i*5+0] = np.random.random()*image_size init[i*5+1] = np.random.random()*image_size init[i*5+2] = np.random.normal(128,127) init[i*5+3] = np.random.normal(128,127) init[i*5+4] = np.random.normal(128,127) elif self.img_ch==1: init[i*3+0] = np.random.random()*image_size init[i*3+1] = np.random.random()*image_size init[i*3+2] = np.random.normal(-1,1) attack_result = differential_evolution(predict_fn, bounds, maxiter=maxiter, popsize=popmul, recombination=1, atol=-1, callback=callback_fn, polish=False, init=inits) attack_image = self.perturb_image(attack_result.x, img) #attack_var = Variable(attack_image, volatile=True).cuda() attack_var = Variable(attack_image, volatile=True).to(self.device) predicted_probs = F.softmax(net(attack_var)).data.cpu().numpy()[0] predicted_class = np.argmax(predicted_probs) if (not targeted_attack and predicted_class != label) or (targeted_attack and predicted_class == target_calss): return 1, attack_result.x.astype(int),attack_var return 0, [None], attack_var def attack_all(self,net, loader, pixels=1, targeted=False, maxiter=75, popsize=400, verbose=False): total_fake_fp = 0.0 success = 0 success_rate = 0 for batch_idx, (image_input, target,imagename) in enumerate(loader): if type(imagename) is tuple: imagename = imagename[0] #print("image_input.shape", image_input.shape,target) #img_var = Variable(image_input, volatile=True).cuda() img_var = Variable(image_input, volatile=True).to(self.device) #prior_probs = F.softmax(net(img_var)) prior_probs = F.softmax(F.sigmoid(net(img_var))) #print(prior_probs) _, indices = torch.max(prior_probs, 1) if target[0] ==1: #If the image is live, we dont need to perform attack on it continue if target[0] == 0 and target[0] != indices.data.cpu()[0]: #Actual label is fake but prediction already live, attack not possible continue total_fake_fp += 1 target = target.numpy() targets = [None] if not targeted else range(10) print("targeted mode", targeted) for target_calss in targets: if (targeted): if (target_calss == target[0]): continue print("Running attack for target",target[0],"and pred",indices.data.cpu()[0]) flag, x, attack_var = self.attack(image_input, target[0], net, target_calss, pixels=pixels, maxiter=maxiter, popsize=popsize, verbose=verbose) print("flag==>", flag) success += flag if flag == 1: print("1 positive attack recorded") save_image(img_var,'./dataset/adversarial_data/FGSM/' + imagename+'_purturbed.png') if (targeted): success_rate = float(success)/(9*total_fake_fp) else: success_rate = float(success)/total_fake_fp return success_rate def FGSM_attack_all(self,net, loader, maxiter=400): total_fake_fp = 0.0 success = 0.0 success_rate=0.0 learning_rate = 1e-3 epsilon = 0.01 for batch_idx, (image_input, target, imagename) in enumerate(loader): image_input = image_input.to(self.device) target = target.to(self.device) if type(imagename) is tuple: imagename = imagename[0] net.eval() # To switch off Dropouts and batchnorm #img_var = Variable(image_input,requires_grad=True).cuda() img_var = Variable(image_input,requires_grad=True).to(self.device) #prior_probs = F.softmax(net(img_var)) prior_probs = F.softmax(F.sigmoid(net(img_var))) _, indices = torch.max(prior_probs, 1) if target[0] ==1: #If the image is live, we dont need to perform attack on it continue if target[0] == 0 and target[0] != indices.data.cpu()[0]: #Actual label is fake but prediction already live, attack not possible continue total_fake_fp += 1.0 for i in range(maxiter): pred_output = net(img_var) prior_probs = F.softmax(F.sigmoid(pred_output)) _,indices = torch.max(prior_probs, 1) if target[0] != indices.data.cpu()[0]: #If after perturbations, misclassification occurs, its a +ve attack print("1 positive attack recorded", indices.data.cpu()[0] ) success +=1.0 save_image(img_var,'./dataset/adversarial_data/FGSM/'+imagename+'_purturbed.png') break loss = self.criterion(pred_output,target) loss.backward() img_var_grad = torch.sign(img_var.grad.data) img_var = img_var.data + epsilon * img_var_grad img_var.requires_grad = True success_rate = success/total_fake_fp print(" Total correctly recognized fake images are ---> ",total_fake_fp) print(" Successful attacks rate ----->", success_rate) return success_rate def DeepFool_attack_all(self,net, loader, maxiter=400): total_fake_fp = 0.0 success = 0.0 success_rate=0.0 learning_rate = 1e-3 epsilon = 0.01 for batch_idx, (image_input, target,imagename) in enumerate(loader): if type(imagename) is tuple: imagename = imagename[0] image_input = image_input.to(self.device) target = target.to(self.device) net.eval() # To switch off Dropouts and batchnorm #prior_probs = F.softmax(net(img_var)) prior_probs = F.softmax(F.sigmoid(net(image_input))) _, indices = torch.max(prior_probs, 1) if target[0] ==1: #If the image is live, we dont need to perform attack on it continue if target[0] == 0 and target[0] != indices.data.cpu()[0]: #Actual label is fake but prediction already live, attack not possible continue total_fake_fp += 1.0 r, loop_i, label_orig, label_pert, pert_image = deepfool(image_input[0], net,max_iter=maxiter) print("Original label = ", np.int(label_orig)) print("Perturbed label = ", np.int(label_pert)) if np.int(label_orig) == 0 and np.int(label_pert)== 1: print("1 positive attack recorded") save_image(pert_image,'./dataset/adversarial_data/DeepFool/'+imagename+'_purturbed.png') success+=1.0 success_rate = success/total_fake_fp print(" Total correctly recognized fake images are ---> ",total_fake_fp) print(" Successful attacks rate ----->", success_rate) return success_rate def train(self): model_net_path = './models/epoch-9-LivDet2015-200-0.0010-70-0.0000.pkl' if os.path.isfile(model_net_path): # Load the pretrained Encoder self.model_net.load_state_dict(torch.load(model_net_path)) print('%s is Successfully Loaded from %s'%(self.model_type,model_net_path)) cudnn.benchmark = True print("-------> starting Attack <------") if self.config.attack_type == 'DE': results = self.attack_all(self.model_net, self.attack_loader, pixels=self.config.pixels, targeted=self.config.targeted, maxiter=self.config.maxiter, popsize=self.config.popsize, verbose=False) elif self.config.attack_type =='FGSM': results =self.FGSM_attack_all(self.model_net,self.attack_loader,maxiter=self.config.maxiter) elif self.config.attack_type == 'DeepFool': results = self.DeepFool_attack_all(self.model_net, self.attack_loader, maxiter = self.config.maxiter) print(results) print ("Final success rate: ",results) else: print('Cannot find trained model, Cannot attack this network before training')
def crowd_detection(threshold, layout): # Class definition counting = Counting() display = Display() frame_manipulation = FrameManipulation() report = Report() roi = Roi() # Var definition create_roi = False frame_id = 0 interval = 30 is_box_active = False pts = [] report_duration = 0 threshold_confident = 0.05 using_roi = False video_cap = cv2.VideoCapture(video_path) # Set Yolo net = cv2.dnn.readNet(yolo_weight_path, yolo_cfg_path) # Set CUDA usage net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) # Yolo Definition layer_names = net.getLayerNames() output_layers = [ layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers() ] # Set window window = sg.Window("Kontrol", layout, size=(300, 380), resizable=False, finalize=True) # Read until video is completed while video_cap.isOpened(): event, values = window.read(timeout=20) ret, frame = video_cap.read() frame_id += 1 if ret: # Var definition boxes = [] confidences = [] mask = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8) mask2 = np.zeros((frame.shape[0], frame.shape[1], 3), np.uint8) if using_roi: points = np.array(pts, np.int32) points = points.reshape((-1, 1, 2)) mask2 = cv2.fillPoly(mask.copy(), [points], (255, 255, 255)) frame_roi = cv2.bitwise_and(mask2, frame) frame_to_detect = frame_roi else: mask2[:] = (255, 255, 255) frame_to_detect = frame # Detection object people = Detection.detection_object(net, output_layers, boxes, confidences, threshold_confident, frame_to_detect) # Draw object detected on mask frame_manipulation.draw_object_detected(boxes, people, mask) if using_roi: mask = cv2.bitwise_and(mask2, mask) # Crowd counting percentage = counting.occupancy_counting(mask, mask2) # Person counting person_detected = counting.people_counting(people) # Draw bounding box if is_box_active: frame_manipulation.draw_bounding_box( boxes, confidences, people, frame) # Define ROI if create_roi: using_roi = roi.define_roi(window, pts, frame, percentage, person_detected, threshold) create_roi = not create_roi # Show FPS display.show_fps(frame_id, frame) # Display frame display.display_frame(using_roi, pts, frame) # Create report report_duration = report.create_report(report_duration, interval, image_path, log_path, percentage, threshold, person_detected, frame) # Window update window["-THRESHOLD1-"].update(value=threshold) window["-CROWD1-"].update(value=person_detected) window["-OCCUPY1-"].update(value=percentage) else: break # Display frame if event == '-BOUND-': is_box_active = not is_box_active # Create ROI (hide button) if event == '-CREATE-': create_roi = not create_roi if event == '-EXIT-': break video_cap.release() cv2.destroyAllWindows()
class Hand(object): """ This contains all the usefull information for a detected hand. """ def __init__(self, detector): """ Hand class attributes values. """ self._detector = detector self._id = None self._fingertips = [] self._intertips = [] self._center_of_mass = None self._finger_distances = [] self._average_defect_distance = [] self._contour = None self._consecutive_tracking_fails = 0 self._consecutive_detection_fails = 0 self._frame_count = 0 self._color = get_random_color() self._confidence = 0 self._tracked = False self._detected = False self._detection_status = 0 self._position_history = [] # The region of the image where the hand is expected to be located when initialized or lost self._initial_roi = Roi() # The region where the hand have been detected the last time self._detection_roi = Roi() # The region where the hand was tracked the last time self._tracking_roi = Roi() # Region extended from tracking_roi to a maximum of initial_roi to look for the hand self._extended_roi = Roi() self._mask_mode = MASKMODES.COLOR self._debug = True self._depth_threshold = -1 self._last_frame = None self._ever_detected = False ##################################################################### ########## Properties and setters ########## ##################################################################### @property def initial_roi(self): return self._initial_roi @initial_roi.setter def initial_roi(self, value): # assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "initial_roi must be of the type Roi" if isinstance(value, Roi): self._initial_roi = value else: self._initial_roi = Roi(value) self.extended_roi = self._initial_roi @property def tracking_roi(self): return self._tracking_roi @tracking_roi.setter def tracking_roi(self, value): assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "tracking_roi must be of the type Roi" if isinstance(value, Roi): self._tracking_roi = value else: self._tracking_roi = Roi(value) # Tracking_roi must be limited to the initial_roi self._tracking_roi.limit_to_roi(self.initial_roi) @property def detection_roi(self): return self._detection_roi @detection_roi.setter def detection_roi(self, value): assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "detection_roi must be of the type Roi" if isinstance(value, Roi): self._detection_roi = value else: self._detection_roi = Roi(value) # Detection_roi must be limited to the initial_roi self._detection_roi.limit_to_roi(self.initial_roi) @property def extended_roi(self): return self._extended_roi @extended_roi.setter def extended_roi(self, value): assert all(isinstance(n, (int, float)) for n in value) or isinstance(value, Roi), "extended_roi must be of the type Roi" if isinstance(value, Roi): self._extended_roi = value else: self._extended_roi = Roi(value) # Extended_roi must be limited to the initial_roi self._extended_roi.limit_to_roi(self.initial_roi) @property def depth_threshold(self): return self._depth_threshold @depth_threshold.setter def depth_threshold(self, value): self._depth_threshold = value @property def confidence(self): return self._confidence @confidence.setter def confidence(self, value): self._confidence = value @property def valid(self): return (self.detected or self.tracked or self._confidence > 0) @property def detected(self): return self._detected @detected.setter def detected(self, value): self._detected = value @property def tracked(self): return self._tracked @tracked.setter def tracked(self, value): self._tracked = value ##################################################################### ########## Probably deprecated methods # TODO: check ########## ##################################################################### #TODO: Check if we need a deep copy of the data. # def update_attributes_from_detected(self, other_hand): # """ # update current hand with the values of other hand # TODO: need to be checked. # :param other_hand: the hand where the values are going to be copied # :return: None # """ # self._fingertips = other_hand._fingertips # self._intertips = other_hand._intertips # self._center_of_mass = other_hand._center_of_mass # self._finger_distances = other_hand._finger_distances # self._average_defect_distance = other_hand._average_defect_distance # self._contour = other_hand._contour # self.detection_roi = other_hand._detection_roi # self._detected = True # def update_truth_value_by_time(self): # """ # Update the truth value of the hand based on the time elapsed between two calls # and if the hand is detected and tracked # :return: None # """ # if self.last_time_update is not None: # elapsed_time = datetime.now() - self.last_time_update # elapsed_miliseconds = int(elapsed_time.total_seconds() * 1000) # # Calculate how much we would substract if the hand is undetected # truth_subtraction = elapsed_miliseconds * MAX_TRUTH_VALUE / MAX_UNDETECTED_SECONDS * 1000 # # Calculate how much we should increment if the hand has been detected # detection_adition = DETECTION_TRUTH_FACTOR if self._detected is True else 0 # # Calculate how much we should increment if the is tracked # tracking_adition = TRACKING_TRUTH_FACTOR if self._tracked is True else 0 # # update of the truth value # self._confidence = self._confidence - truth_subtraction + detection_adition + tracking_adition # self.last_time_update = datetime.now() # Deprecated: using update_truth_value_by_frame2 # def update_truth_value_by_frame(self): # """ # Update the truth value of the hand based on the frames elapsed between two calls # and if the hand is detected and tracked # :return: None # """ # one_frame_truth_subtraction = MAX_TRUTH_VALUE / MAX_UNDETECTED_FRAMES # detection_adition = 0 # if self._detected: # detection_adition = DETECTION_TRUTH_FACTOR * one_frame_truth_subtraction # else: # self._consecutive_detection_fails += 1 # detection_adition = -1 * UNDETECTION_TRUTH_FACTOR * one_frame_truth_subtraction # tracking_adition = 0 # if self._tracked: # tracking_adition = TRACKING_TRUTH_FACTOR * one_frame_truth_subtraction # else: # self._consecutive_tracking_fails += 1 # tracking_adition = -1 * UNTRACKING_TRUTH_FACTOR * one_frame_truth_subtraction # new_truth_value = self._confidence - one_frame_truth_subtraction + detection_adition + tracking_adition # if new_truth_value <= MAX_TRUTH_VALUE: # self._confidence = new_truth_value # else: # self._confidence = MAX_TRUTH_VALUE # self._frame_count += 1 # def update_truth_value_by_frame2(self): # substraction = 0 # one_frame_truth_subtraction = MAX_TRUTH_VALUE / MAX_UNDETECTED_FRAMES # if not self._detected: # self._consecutive_detection_fails += 1 # if not self._tracked: # self._consecutive_tracking_fails += 1 # if not self._detected and not self._tracked: # substraction = -1 * UNDETECTION_TRUTH_FACTOR * UNTRACKING_TRUTH_FACTOR * one_frame_truth_subtraction # else: # if self._tracked: # substraction = substraction + UNTRACKING_TRUTH_FACTOR * one_frame_truth_subtraction # if self._detected: # substraction = substraction + UNDETECTION_TRUTH_FACTOR * one_frame_truth_subtraction # new_truth_value = self._confidence + substraction # if new_truth_value <= 100: # self._confidence = new_truth_value # else: # self._confidence = 100 # self._frame_count += 1 # def copy_main_attributes(self): # """ # Return a new hand with the main attributes of this copied into it # :return: New Hand with the main attributes copied into it # """ # updated_hand = Hand() # updated_hand._id = self._id # updated_hand._fingertips = [] # updated_hand._intertips = [] # updated_hand._center_of_mass = None # updated_hand._finger_distances = [] # updated_hand._average_defect_distance = [] # updated_hand._contour = None # updated_hand.detection_roi = self.detection_roi # updated_hand._consecutive_tracking_fails = self._consecutive_tracking_fails # updated_hand._position_history = self._position_history # updated_hand._color = self._color # return updated_hand ##################################################################### ########## Currently used methods ########## ##################################################################### def create_contours_and_mask(self, frame, roi=None): # Create a binary image where white will be skin colors and rest is black hands_mask = self.create_hand_mask(frame) if hands_mask is None: return ([], []) if roi is not None: x, y, w, h = roi else: x, y, w, h = self.initial_roi roied_hands_mask = roi.apply_to_frame_as_mask(hands_mask) if self._debug: # cv2.imshow("DEBUG: HandDetection_lib: create_contours_and_mask (Frame Mask)", hands_mask) # to_show = cv2.resize(hands_mask, None, fx=.3, fy=.3, interpolation=cv2.INTER_CUBIC) to_show = roied_hands_mask.copy() # cv2.putText(to_show, (str(w)), (x + w, y), FONT, 0.3, [255, 255, 255], 1) # cv2.putText(to_show, (str(h)), (x + w, y + h), FONT, 0.3, [100, 255, 255], 1) # cv2.putText(to_show, (str(w * h)), (x + w / 2, y + h / 2), FONT, 0.3, [100, 100, 255], 1) # cv2.putText(to_show, (str(x)+", "+str(y)), (x-10, y-10), FONT, 0.3, [255, 255, 255], 1) to_show = roi.draw_on_frame(to_show) # cv2.imshow("DEBUG: HandDetection_lib: create_contours_and_mask (current_roi_mask)", roi.extract_from_frame(frame)) # cv2.imshow("DEBUG: HandDetection_lib: create_contours_and_mask (ROIed Mask)", to_show) ret, thresh = cv2.threshold(roied_hands_mask, 127, 255, 0) # Find contours of the filtered frame contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return (contours, hands_mask) def create_hand_mask(self, image, mode=None): if mode is None: mode = self._mask_mode # print "create_hands_mask %s" % mode mask = None if mode == MASKMODES.COLOR: mask = get_color_mask(image) elif mode == MASKMODES.MOG2: mask = self._detector.get_MOG2_mask(image) elif mode == MASKMODES.DIFF: mask = self._detector.get_simple_diff_mask2(image) elif mode == MASKMODES.MIXED: diff_mask = self._detector.get_simple_diff_mask(image) color_mask = get_color_mask(image) color_mask = clean_mask_noise(color_mask) if diff_mask is not None and color_mask is not None: mask = cv2.bitwise_and(diff_mask, color_mask) # if self._debug: # cv2.imshow("DEBUG: HandDetection_lib: diff_mask", diff_mask) # cv2.imshow("DEBUG: HandDetection_lib: color_mask", color_mask) elif mode == MASKMODES.MOVEMENT_BUFFER: # Absolutly unusefull mask = self._detector.get_movement_buffer_mask(image) elif mode == MASKMODES.DEPTH: if self._debug: print("Mode depth") assert self.depth_threshold != -1, "Depth threshold must be set with set_depth_mask method. Use this method only with RGBD cameras" assert len(image.shape) == 2 or image.shape[2] == 1, "Depth image should have only one channel and it have %d" % image.shape[2] #TODO: ENV_DEPENDENCE: the second value depends on the distance from the camera to the maximum depth where it can be found in a scale of 0-255 mask = image mask[mask>self.depth_threshold]= 0 mask = self.depth_mask_to_image(mask) # Kernel matrices for morphological transformation kernel_square = np.ones((5, 5), np.uint8) kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # cv2.imwrite("/home/robolab/robocomp/components/robocomp-robolab/components/handDetection/src/images/"+str(datetime.now().strftime("%Y%m%d%H%M%S"))+".png", mask) # # dilation = cv2.dilate(mask, kernel_ellipse, iterations=1) # erosion = cv2.erode(dilation, kernel_square, iterations=1) mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_square) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_square) # dilation2 = cv2.dilate(erosion, kernel_ellipse, iterations=1) # filtered = cv2.medianBlur(dilation2, 5) # kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8)) # dilation2 = cv2.dilate(filtered, kernel_ellipse, iterations=1) # kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # dilation3 = cv2.dilate(filtered, kernel_ellipse, iterations=1) mask = cv2.medianBlur(mask, 3) # _, mask = cv2.threshold(mask, 100, 255, cv2.THRESH_BINARY) return mask def get_hand_bounding_rect_from_fingers(self, hand_contour, fingers_contour): (x, y), radius = cv2.minEnclosingCircle(fingers_contour) center = (int(x), int(y)) radius = int(radius) + 10 new_hand_contour = extract_contour_inside_circle(hand_contour, (center, radius)) hand_bounding_rect = cv2.boundingRect(new_hand_contour) return hand_bounding_rect, ((int(x), int(y)), radius), new_hand_contour # def get_hand_bounding_rect_from_rect(self, hand_contour, bounding_rect): # hand_contour = extract_contour_inside_rect(hand_contour, bounding_rect) # hand_bounding_rect = cv2.boundingRect(hand_contour) # return hand_bounding_rect, hand_contour # def get_hand_bounding_rect_from_center_of_mass(self, hand_contour, center_of_mass, average_distance): # (x, y) = center_of_mass # radius = average_distance # center = (int(x), int(y)) # radius = int(radius) + 10 # hand_contour = extract_contour_inside_circle(hand_contour, (center, radius)) # hand_bounding_rect = cv2.boundingRect(hand_contour) # return hand_bounding_rect, ((int(x), int(y)), radius), hand_contour # TODO: Move to Utils file @staticmethod def depth_mask_to_image(depth): depth_min = np.min(depth) depth_max = np.max(depth) if depth_max!= depth_min and depth_max>0: image = np.interp(depth, [depth_min, depth_max], [0.0, 255.0], right=255, left=0) else: image = np.zeros(depth.shape, dtype=np.uint8) image = np.array(image, dtype=np.uint8) image = image.reshape(480, 640, 1) return image def _detect_in_frame(self, frame): self._last_frame = frame search_roi = self.get_roi_to_use(frame) # Create contours and mask self._frame_contours, self._frame_mask = self.create_contours_and_mask(frame, search_roi) # get the maximum contour if len(self._frame_contours) > 0 and len(self._frame_mask) > 0: # Get the maximum area contour min_area = 100 hand_contour = None for i in range(len(self._frame_contours)): cnt = self._frame_contours[i] area = cv2.contourArea(cnt) if area > min_area: min_area = area hand_contour = self._frame_contours[i] if hand_contour is not None: # cv2.drawContours(frame, [hand_contour], -1, (0, 255, 255), 2) detected_hand_bounding_rect = cv2.boundingRect(hand_contour) detected_hand_x, detected_hand_y, detected_hand_w, detected_hand_h = detected_hand_bounding_rect frame_mask_roi_image = self._frame_mask[search_roi.y:search_roi.y+ search_roi.height, search_roi.x:search_roi.x + search_roi.width] frame_mask_roi_image_contour, _, _ = self.calculate_max_contour(frame_mask_roi_image, to_binary=False) # self._detected is updated inside self.update_hand_with_contour(hand_contour) else: self._detected = False self._detection_status = -1 else: self._detected = False self._detection_status = -2 def calculate_max_contour(self, image, to_binary=True): # if self._debug: # cv2.imshow("Hand: calculate_max_contour, image", image) bounding_rect = None image_roi = None if to_binary: gray_diff = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) _, mask = cv2.threshold(gray_diff, 40, 255, cv2.THRESH_BINARY) else: mask = image # kernel_square = np.ones((11, 11), np.uint8) # kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # # # Perform morphological transformations to filter out the background noise # # Dilation increase skin color area # # Erosion increase skin color area # dilation = cv2.dilate(mask, kernel_ellipse, iterations=1) # erosion = cv2.erode(dilation, kernel_square, iterations=1) # dilation2 = cv2.dilate(erosion, kernel_ellipse, iterations=1) # filtered = cv2.medianBlur(dilation2.astype(np.uint8), 5) # kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8)) # dilation2 = cv2.dilate(filtered, kernel_ellipse, iterations=1) # kernel_ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) # dilation3 = cv2.dilate(filtered, kernel_ellipse, iterations=1) # median = cv2.medianBlur(dilation2, 5) # if self._debug: # cv2.imshow("Hand: calculate_max_contour, median", median) ret, thresh = cv2.threshold(mask, 127, 255, 0) # if self._debug: # cv2.imshow("Hand: calculate_max_contour, thresh", thresh) cnts = None max_area = 100 ci = 0 # Find contours of the filtered frame contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if contours: for i in range(len(contours)): cnt = contours[i] area = cv2.contourArea(cnt) if area > max_area: max_area = area ci = i cnts = contours[ci] bounding_rect = cv2.boundingRect(cnts) x, y, w, h = bounding_rect image_roi = mask[y:y + h, x:x + w] return cnts, bounding_rect, image_roi def update_hand_with_contour(self, hand_contour): """ Attributes of the hand are calculated from the hand contour. TODO: calculate a truth value A score of 100 is the maximum value for the hand truth. This value is calculated like this: A hand is expected to have 5 finger tips, 4 intertips, a center of mass :param hand_contour: calculated contour that is expected to describe a hand :return: None """ hull2 = cv2.convexHull(hand_contour, returnPoints=False) # Get defect points defects = cv2.convexityDefects(hand_contour, hull2) if defects is not None: estimated_fingertips_coords, \ estimated_fingertips_indexes, \ estimated_intertips_coords, \ estimated_intertips_indexes = self._calculate_fingertips(hand_contour, defects) is_hand = self.is_hand(estimated_fingertips_coords, estimated_intertips_coords, strict=True) if is_hand: self._fingertips = estimated_fingertips_coords self._intertips = estimated_intertips_coords if len(estimated_fingertips_coords) == 5: fingers_contour = np.take(hand_contour, estimated_fingertips_indexes + estimated_intertips_indexes, axis=0, mode="wrap") bounding_rect, hand_circle, self._contour = self.get_hand_bounding_rect_from_fingers( hand_contour, fingers_contour) # detection roi is set to the bounding rect of the fingers upscaled 20 pixels # self.detection_roi = Roi(bounding_rect) self.detection_roi = Roi(bounding_rect).upscaled(Roi.from_frame(self._last_frame, SIDE.CENTER, 100), 10) if self._debug: to_show = self._last_frame.copy() cv2.drawContours(to_show, [hand_contour], -1, (255, 255, 255), 2) cv2.drawContours(to_show, [fingers_contour], -1, (200, 200, 200), 2) to_show = self.detection_roi.draw_on_frame(to_show) # cv2.rectangle(to_show, (self.detection_roi.y, self.detection_roi.x), (self.detection_roi.y + self.detection_roi.height, self.detection_roi.x + self.detection_roi.width), [255, 255, 0]) # (x, y, w, h) = cv2.boundingRect(hand_contour) # cv2.rectangle(to_show, (self.detection_roi.y, self.detection_roi.x), (self.detection_roi.x + self.detection_roi.height, self.detection_roi.x + self.detection_roi.width), [255, 255, 0]) cv2.imshow("update_hand_with_contour", to_show) self._detected = True self._detection_status = 1 self._ever_detected = True self._confidence = 100 else: self._detection_status = -1 self._detected = False self._confidence = 0 return else: self._detection_status = -1 self._detected = False self._confidence = 0 return # Find moments of the largest contour moments = cv2.moments(hand_contour) center_of_mass = None finger_distances = [] average_defect_distance = None # Central mass of first order moments if moments['m00'] != 0: cx = int(moments['m10'] / moments['m00']) # cx = M10/M00 cy = int(moments['m01'] / moments['m00']) # cy = M01/M00 center_of_mass = (cx, cy) self._center_of_mass = center_of_mass self._position_history.append(center_of_mass) if center_of_mass is not None and len(estimated_intertips_coords) > 0: # Distance from each finger defect(finger webbing) to the center mass distance_between_defects_to_center = [] for far in estimated_intertips_coords: x = np.array(far) center_mass_array = np.array(center_of_mass) distance = np.sqrt( np.power(x[0] - center_mass_array[0], 2) + np.power(x[1] - center_mass_array[1], 2) ) distance_between_defects_to_center.append(distance) # Get an average of three shortest distances from finger webbing to center mass sorted_defects_distances = sorted(distance_between_defects_to_center) average_defect_distance = np.mean(sorted_defects_distances[0:2]) self._average_defect_distance = average_defect_distance # # Get fingertip points from contour hull # # If points are in proximity of 80 pixels, consider as a single point in the group # finger = [] # for i in range(0, len(hull) - 1): # if (np.absolute(hull[i][0][0] - hull[i + 1][0][0]) > 10) or ( # np.absolute(hull[i][0][1] - hull[i + 1][0][1]) > 10): # if hull[i][0][1] < 500: # finger.append(hull[i][0]) # # # # The fingertip points are 5 hull points with largest y coordinates # finger = sorted(finger, key=lambda x: x[1]) # fingers = finger[0:5] if center_of_mass is not None and len(estimated_fingertips_coords) > 0: # Calculate distance of each finger tip to the center mass finger_distances = [] for i in range(0, len(estimated_fingertips_coords)): distance = np.sqrt( np.power(estimated_fingertips_coords[i][0] - center_of_mass[0], 2) + np.power( estimated_fingertips_coords[i][1] - center_of_mass[0], 2)) finger_distances.append(distance) self._finger_distances = finger_distances else: self._detection_status = -2 self._detected = False self._confidence = 0 return def _calculate_fingertips(self, hand_contour, defects): intertips_coords = [] intertips_indexes = [] far_defect = [] fingertips_coords = [] fingertips_indexes = [] defect_indices = [] for defect_index in range(defects.shape[0]): s, e, f, d = defects[defect_index, 0] start = tuple(hand_contour[s][0]) end = tuple(hand_contour[e][0]) far = tuple(hand_contour[f][0]) far_defect.append(far) # cv2.line(frame, start, end, [0, 255, 0], 1) a = math.sqrt((end[0] - start[0]) ** 2 + (end[1] - start[1]) ** 2) b = math.sqrt((far[0] - start[0]) ** 2 + (far[1] - start[1]) ** 2) c = math.sqrt((end[0] - far[0]) ** 2 + (end[1] - far[1]) ** 2) angle = math.acos((b ** 2 + c ** 2 - a ** 2) / (2 * b * c)) # cosine theorem # cv2.circle(frame, far, 8, [211, 84, 125], -1) # cv2.circle(frame, start, 8, [0, 84, 125], -1) # cv2.circle(frame, end, 8, [0, 84, 125], -1) # Get tips and intertips coordinates # TODO: ENV_DEPENDENCE: this angle > 90degrees determinate if two points are considered fingertips or not and 90 make thumb to fail in some occasions intertips_max_angle = math.pi / 1.7 if angle <= intertips_max_angle: # angle less than 90 degree, treat as fingers defect_indices.append(defect_index) # cv2.circle(frame, far, 8, [211, 84, 0], -1) intertips_coords.append(far) intertips_indexes.append(f) # cv2.putText(frame, str(s), start, FONT, 0.7, (255, 255, 255), 1) # cv2.putText(frame, str(e), end, FONT, 0.7, (255, 255, 200), 1) if len(fingertips_coords) > 0: from scipy.spatial import distance # calculate distances from start and end to the already known tips start_distance, end_distance = tuple( distance.cdist(fingertips_coords, [start, end]).min(axis=0)) # TODO: ENV_DEPENDENCE: it determinate the pixels distance to consider two points the same. It depends on camera resolution and distance from the hand to the camera same_fingertip_radius = 10 if start_distance > same_fingertip_radius: fingertips_coords.append(start) fingertips_indexes.append(s) # cv2.circle(frame, start, 10, [255, 100, 255], 3) if end_distance > same_fingertip_radius: fingertips_coords.append(end) fingertips_indexes.append(e) # cv2.circle(frame, end, 10, [255, 100, 255], 3) else: fingertips_coords.append(start) fingertips_indexes.append(s) # cv2.circle(frame, start, 10, [255, 100, 255], 3) fingertips_coords.append(end) fingertips_indexes.append(e) # cv2.circle(frame, end, 10, [255, 100, 255], 3) # cv2.circle(frame, far, 10, [100, 255, 255], 3) return fingertips_coords, fingertips_indexes, intertips_coords, intertips_indexes # TODO: modify to use a calculated confidence def is_hand(self, fingertips, intertips, strict=True): if strict: return len(fingertips) == 5 and len(intertips) > 2 else: return 5 >= len(fingertips) > 2 # def detect_and_track(self, frame): # """ # Try to detect and track the hand on the given frame # If the hand is not detected the extended_roi is updated which will be used in the next detection # :param frame: # :return: # """ # self._detect_in_frame(frame) # if self._detected: # self._consecutive_detection_fails = 0 # else: # self._consecutive_detection_fails += 1 # self._track_in_frame(frame) # print(self._detected, self._tracked) # # if it's the first time we don't detect in a row... # if self._consecutive_detection_fails == 1: # # if we have a tracking roi we use it # if self._tracked: # self.extended_roi = self.tracking_roi # else: # # if we don't, we use the last detected roi # self.extended_roi = self.detection_roi # elif self._consecutive_detection_fails > 1: # # if it's not the first time we don't detect we just extend the extended roi. # # it's autolimited to the initial Roi # self.extended_roi = self.extended_roi.upscaled(self.initial_roi, 10) # if self._tracked: # self._consecutive_tracking_fails = 0 # else: # self._consecutive_tracking_fails += 1 # self._update_truth_value_by_frame2() def get_roi_to_use(self, frame): """ Calculate the roi to be used depending on the situation of the hand (initial, detected, tracked) :param frame: :return: """ current_roi = None if self._detected: current_roi = self.detection_roi else: # if we already have failed to detect we use the extended_roi if self._consecutive_detection_fails > 0: if self._tracked: current_roi = self.tracking_roi else: current_roi = self.extended_roi else: # Not detected and not consecutive fails on detection. # It's probably the first time we try to detect. # If no initial_roi is given an square of 200 x 200 is taken on the center if self.initial_roi is not None and self.initial_roi != Roi(): current_roi = self.initial_roi else: current_roi = Roi.from_frame(frame, SIDE.CENTER, 50) assert current_roi != Roi(), "hand can't be detected on a %s roi of the frame" % str(current_roi) return current_roi def _track_in_frame(self, frame, method="camshift"): self._last_frame = frame # for hand coor in frame to csv xmin = None ymin = None xmax = None ymax = None if self._ever_detected: roi_for_tracking = self.get_roi_to_use(frame) mask = self.create_hand_mask(frame) x, y, w, h = roi_for_tracking # for hand coor in frame to csv xmin = x ymin = y xmax = x + w ymax = y + h track_window = tuple(roi_for_tracking) # set up the ROI for tracking roi = roi_for_tracking.extract_from_frame(frame) if self._debug: print roi_for_tracking cv2.imshow("DEBUG: HandDetection_lib: _track_in_frame (frame_roied)", roi) # fi masked frame is only 1 channel if len(frame.shape) == 2 or (len(frame.shape) == 3 and frame.shape[2] == 1): hsv_roi = cv2.cvtColor(roi, cv2.COLOR_GRAY2RGB) hsv_roi = cv2.cvtColor(hsv_roi, cv2.COLOR_RGB2HSV) hsv = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) hsv = cv2.cvtColor(hsv, cv2.COLOR_BGR2HSV) else: hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # mask = cv2.inRange(hsv_roi, np.array((0., 60., 32.)), np.array((180., 255., 255.))) roi_mask = mask[y:y + h, x:x + w] # if self._debug: # cv2.imshow("DEBUG: HandDetection_lib: follow (ROI extracted mask)", roi_mask) roi_hist = cv2.calcHist([hsv_roi], [0], roi_mask, [180], [0, 180]) cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX) # Setup the termination criteria, either 10 iteration or move by atleast 1 pt term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1) dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1) # apply meanshift to get the new location if method == "meanshift": tracked, new_track_window = cv2.meanShift(dst, track_window, term_crit) self._tracked = (tracked != 0) else: rotated_rect, new_track_window = cv2.CamShift(dst, track_window, term_crit) intersection_rate = roi_for_tracking.intersection_rate(Roi(new_track_window)) if intersection_rate and roi_for_tracking != Roi(new_track_window): self._tracked = True else: self._tracked = False if self._tracked: self.tracking_roi = Roi(new_track_window) else: self._tracked = False # for hand coor in frame to csv return xmin, ymin, xmax, ymax
def elaborate(self, platform): # VGA constants pixel_f = self.timing.pixel_freq hsync_front_porch = self.timing.h_front_porch hsync_pulse_width = self.timing.h_sync_pulse hsync_back_porch = self.timing.h_back_porch vsync_front_porch = self.timing.v_front_porch vsync_pulse_width = self.timing.v_sync_pulse vsync_back_porch = self.timing.v_back_porch # Pins clk25 = platform.request("clk25") ov7670 = platform.request("ov7670") led = [platform.request("led", i) for i in range(8)] leds = Cat([i.o for i in led]) led8_2 = platform.request("led8_2") leds8_2 = Cat([led8_2.leds[i] for i in range(8)]) led8_3 = platform.request("led8_3") leds8_3 = Cat([led8_3.leds[i] for i in range(8)]) leds16 = Cat(leds8_3, leds8_2) btn1 = platform.request("button_fire", 0) btn2 = platform.request("button_fire", 1) up = platform.request("button_up", 0) down = platform.request("button_down", 0) pwr = platform.request("button_pwr", 0) left = platform.request("button_left", 0) right = platform.request("button_right", 0) sw = Cat([platform.request("switch",i) for i in range(4)]) uart = platform.request("uart") divisor = int(platform.default_clk_frequency // 460800) esp32 = platform.request("esp32_spi") csn = esp32.csn sclk = esp32.sclk copi = esp32.copi cipo = esp32.cipo m = Module() # Clock generator. m.domains.sync = cd_sync = ClockDomain("sync") m.domains.pixel = cd_pixel = ClockDomain("pixel") m.domains.shift = cd_shift = ClockDomain("shift") m.submodules.ecp5pll = pll = ECP5PLL() pll.register_clkin(clk25, platform.default_clk_frequency) pll.create_clkout(cd_sync, platform.default_clk_frequency) pll.create_clkout(cd_pixel, pixel_f) pll.create_clkout(cd_shift, pixel_f * 5.0 * (1.0 if self.ddr else 2.0)) # Add CamRead submodule camread = CamRead() m.submodules.camread = camread # Camera config cam_x_res = 640 cam_y_res = 480 camconfig = CamConfig() m.submodules.camconfig = camconfig # Connect the camera pins and config and read modules m.d.comb += [ ov7670.cam_RESET.eq(1), ov7670.cam_PWON.eq(0), ov7670.cam_XCLK.eq(clk25.i), ov7670.cam_SIOC.eq(camconfig.sioc), ov7670.cam_SIOD.eq(camconfig.siod), camconfig.start.eq(btn1), camread.p_data.eq(Cat([ov7670.cam_data[i] for i in range(8)])), camread.href.eq(ov7670.cam_HREF), camread.vsync.eq(ov7670.cam_VSYNC), camread.p_clock.eq(ov7670.cam_PCLK) ] # Create the uart m.submodules.serial = serial = AsyncSerial(divisor=divisor, pins=uart) # Input fifo fifo_depth=1024 m.submodules.fifo = fifo = SyncFIFOBuffered(width=16,depth=fifo_depth) # Frame buffer x_res= cam_x_res // 2 y_res= cam_y_res buffer = Memory(width=16, depth=x_res * y_res) m.submodules.r = r = buffer.read_port() m.submodules.w = w = buffer.write_port() # Button debouncers m.submodules.debup = debup = Debouncer() m.submodules.debdown = debdown = Debouncer() m.submodules.debosd = debosd = Debouncer() m.submodules.debsel = debsel = Debouncer() m.submodules.debsnap = debsnap = Debouncer() m.submodules.debhist = debhist = Debouncer() # Connect the buttons to debouncers m.d.comb += [ debup.btn.eq(up), debdown.btn.eq(down), debosd.btn.eq(pwr), debsel.btn.eq(right), debsnap.btn.eq(left), debhist.btn.eq(btn2) ] # Image processing options flip = Signal(2, reset=1) mono = Signal(reset=0) invert = Signal(reset=0) gamma = Signal(reset=0) border = Signal(reset=0) filt = Signal(reset=0) grid = Signal(reset=0) histo = Signal(reset=1) hbin = Signal(6, reset=0) bin_cnt = Signal(5, reset=0) thresh = Signal(reset=0) threshold = Signal(8, reset=0) hist_chan = Signal(2, reset=0) ccc = CC(reset=(0,18,12,16)) sharpness = Signal(unsigned(4), reset=0) osd_val = Signal(4, reset=0) # Account for spurious start-up button pushes osd_on = Signal(reset=1) osd_sel = Signal(reset=1) snap = Signal(reset=0) frozen = Signal(reset=1) writing = Signal(reset=0) written = Signal(reset=0) byte = Signal(reset=0) w_addr = Signal(18) # Color filter l = Rgb565(reset=(18,12,6)) # Initialised to red LEGO filter h = Rgb565(reset=(21,22,14)) # Region of interest roi = Roi() # VGA signals vga_r = Signal(8) vga_g = Signal(8) vga_b = Signal(8) vga_hsync = Signal() vga_vsync = Signal() vga_blank = Signal() # Fifo co-ordinates f_x = Signal(9) f_y = Signal(9) f_frame_done = Signal() # Pixel from fifo pix = Rgb565() # SPI memory for remote configuration m.submodules.spimem = spimem = SpiMem(addr_bits=32) # Color Control m.submodules.cc = cc = ColorControl() # Image convolution m.submodules.imc = imc = ImageConv() # Statistics m.submodules.stats = stats = Stats() # Histogram m.submodules.hist = hist = Hist() # Filter m.submodules.fil = fil = Filt() # Monochrome m.submodules.mon = mon = Mono() # Sync the fifo with the camera sync_fifo = Signal(reset=0) with m.If(~sync_fifo & ~fifo.r_rdy & (camread.col == cam_x_res - 1) & (camread.row == cam_y_res -1)): m.d.sync += [ sync_fifo.eq(1), f_x.eq(0), f_y.eq(0) ] with m.If(btn1): m.d.sync += sync_fifo.eq(0) # Connect the fifo m.d.comb += [ fifo.w_en.eq(camread.pixel_valid & camread.col[0] & sync_fifo), # Only write every other pixel fifo.w_data.eq(camread.pixel_data), fifo.r_en.eq(fifo.r_rdy & ~imc.o_stall) ] # Calculate fifo co-ordinates m.d.sync += f_frame_done.eq(0) with m.If(fifo.r_en & sync_fifo): m.d.sync += f_x.eq(f_x + 1) with m.If(f_x == x_res - 1): m.d.sync += [ f_x.eq(0), f_y.eq(f_y + 1) ] with m.If(f_y == y_res - 1): m.d.sync += [ f_y.eq(0), f_frame_done.eq(1) ] # Extract pixel from fifo data m.d.comb += [ pix.r.eq(fifo.r_data[11:]), pix.g.eq(fifo.r_data[5:11]), pix.b.eq(fifo.r_data[:5]) ] # Connect color control m.d.comb += [ cc.i.eq(pix), cc.i_cc.eq(ccc) ] # Calculate per-frame statistics, after applying color correction m.d.comb += [ stats.i.eq(cc.o), stats.i_valid.eq(fifo.r_rdy), # This is not valid when a region of interest is active stats.i_avg_valid.eq((f_x >= 32) & (f_x < 288) & (f_y >= 112) & (f_y < 368)), stats.i_frame_done.eq(f_frame_done), stats.i_x.eq(f_x), stats.i_y.eq(f_y), stats.i_roi.eq(roi) ] # Produce histogram, after applying color correction, and after monochrome, for monochrome histogram with m.Switch(hist_chan): with m.Case(0): m.d.comb += hist.i_p.eq(cc.o.r) with m.Case(1): m.d.comb += hist.i_p.eq(cc.o.g) with m.Case(2): m.d.comb += hist.i_p.eq(cc.o.b) with m.Case(3): m.d.comb += hist.i_p.eq(mon.o_m) m.d.comb += [ hist.i_valid.eq(fifo.r_rdy), hist.i_clear.eq(f_frame_done), hist.i_x.eq(f_x), hist.i_y.eq(f_y), hist.i_roi.eq(roi), hist.i_bin.eq(hbin) # Used when displaying histogram ] # Apply filter, after color correction m.d.comb += [ fil.i.eq(cc.o), fil.i_valid.eq(fifo.r_en), fil.i_en.eq(filt), fil.i_frame_done.eq(f_frame_done), fil.i_l.eq(l), fil.i_h.eq(h) ] # Apply mono, after color correction and filter m.d.comb += [ mon.i.eq(fil.o), mon.i_en.eq(mono), mon.i_invert.eq(invert), mon.i_thresh.eq(thresh), mon.i_threshold.eq(threshold) ] # Apply image convolution, after other transformations m.d.comb += [ imc.i.eq(mon.o), imc.i_valid.eq(fifo.r_rdy), imc.i_reset.eq(~sync_fifo), # Select image convolution imc.i_sel.eq(sharpness) ] # Take a snapshot, freeze the camera, and write the framebuffer to the uart # Note that this suspends video output with m.If(debsnap.btn_down | (spimem.wr & (spimem.addr == 22))): with m.If(frozen): m.d.sync += frozen.eq(0) with m.Else(): m.d.sync += [ snap.eq(1), frozen.eq(0), w_addr.eq(0), written.eq(0), byte.eq(0) ] # Wait to end of frame after requesting snapshot, before start of writing to uart with m.If(imc.o_frame_done & snap): m.d.sync += [ frozen.eq(1), snap.eq(0) ] with m.If(~written): m.d.sync += writing.eq(1) # Connect the uart m.d.comb += [ serial.tx.data.eq(Mux(byte, r.data[8:], r.data[:8])), serial.tx.ack.eq(writing) ] # Write to the uart from frame buffer (affects video output) with m.If(writing): with m.If(w_addr == x_res * y_res): m.d.sync += [ writing.eq(0), written.eq(1) ] with m.Elif(serial.tx.ack & serial.tx.rdy): m.d.sync += byte.eq(~byte) with m.If(byte): m.d.sync += w_addr.eq(w_addr+1) # Connect spimem m.d.comb += [ spimem.csn.eq(~csn), spimem.sclk.eq(sclk), spimem.copi.eq(copi), cipo.eq(spimem.cipo), ] # Writable configuration registers spi_wr_vals = Array([ccc.brightness, ccc.redness, ccc.greenness, ccc.blueness, l.r, h.r, l.g, h.g, l.b, h.b, sharpness, filt, border, mono, invert, grid, histo, roi.x[1:], roi.y[1:], roi.w[1:], roi.h[1:], roi.en, None, None, None, threshold, thresh, hist_chan, flip, None, None, None, None, None, None, None, None, None, frozen]) with m.If(spimem.wr): with m.Switch(spimem.addr): for i in range(len(spi_wr_vals)): if spi_wr_vals[i] is not None: with m.Case(i): m.d.sync += spi_wr_vals[i].eq(spimem.dout) # Readable configuration registers spi_rd_vals = Array([ccc.brightness, ccc.redness, ccc.greenness, ccc.blueness, l.r, h.r, l.g, h.g, l.b, h.b, sharpness, filt, border, mono, invert, grid, histo, roi.x[1:], roi.y[1:], roi.w[1:], roi.h[1:], roi.en, fil.o_nz[16:], fil.o_nz[8:16], fil.o_nz[:8], threshold, thresh, hist_chan, flip, stats.o_min.r, stats.o_min.g, stats.o_min.b, stats.o_max.r, stats.o_max.g, stats.o_max.b, stats.o_avg.r, stats.o_avg.g, stats.o_avg.b, frozen, writing, written]) with m.If(spimem.rd): with m.Switch(spimem.addr): for i in range(len(spi_rd_vals)): with m.Case(i): m.d.sync += spimem.din.eq(spi_rd_vals[i]) # Add VGA generator m.submodules.vga = vga = VGA( resolution_x = self.timing.x, hsync_front_porch = hsync_front_porch, hsync_pulse = hsync_pulse_width, hsync_back_porch = hsync_back_porch, resolution_y = self.timing.y, vsync_front_porch = vsync_front_porch, vsync_pulse = vsync_pulse_width, vsync_back_porch = vsync_back_porch, bits_x = 16, # Play around with the sizes because sometimes bits_y = 16 # a smaller/larger value will make it pass timing. ) # Fetch histogram for display old_x = Signal(10) m.d.sync += old_x.eq(vga.o_beam_x) with m.If(vga.o_beam_x == 0): m.d.sync += [ hbin.eq(0), bin_cnt.eq(0) ] with m.Elif(vga.o_beam_x != old_x): m.d.sync += bin_cnt.eq(bin_cnt+1) with m.If(bin_cnt == 19): m.d.sync += [ bin_cnt.eq(0), hbin.eq(hbin+1) ] # Switch between camera and histogram view with m.If(debhist.btn_down): m.d.sync += histo.eq(~histo) # Connect frame buffer, with optional x and y flip x = Signal(10) y = Signal(9) m.d.comb += [ w.en.eq(imc.o_valid & ~frozen), w.addr.eq(imc.o_y * x_res + imc.o_x), w.data.eq(Cat(imc.o.b, imc.o.g, imc.o.r)), y.eq(Mux(flip[1], y_res - 1 - vga.o_beam_y, vga.o_beam_y)), x.eq(Mux(flip[0], x_res - 1 - vga.o_beam_x[1:], vga.o_beam_x[1:])), r.addr.eq(Mux(writing, w_addr, y * x_res + x)) ] # Apply the On-Screen Display (OSD) m.submodules.osd = osd = OSD() hist_col = Signal(8) m.d.comb += [ osd.x.eq(vga.o_beam_x), osd.y.eq(vga.o_beam_y), hist_col.eq(Mux((479 - osd.y) < hist.o_val[8:], 0xff, 0x00)), osd.i_r.eq(Mux(histo, Mux((hist_chan == 0) | (hist_chan == 3), hist_col, 0), Cat(Const(0, unsigned(3)), r.data[11:16]))), osd.i_g.eq(Mux(histo, Mux((hist_chan == 1) | (hist_chan == 3), hist_col, 0), Cat(Const(0, unsigned(2)), r.data[5:11]))), osd.i_b.eq(Mux(histo, Mux((hist_chan == 2) | (hist_chan == 3), hist_col, 0), Cat(Const(0, unsigned(3)), r.data[0:5]))), osd.on.eq(osd_on), osd.osd_val.eq(osd_val), osd.sel.eq(osd_sel), osd.grid.eq(grid), osd.border.eq(border), osd.roi.eq(roi.en & ~histo), osd.roi_x.eq(roi.x), osd.roi_y.eq(roi.y), osd.roi_w.eq(roi.w), osd.roi_h.eq(roi.h) ] # OSD control osd_vals = Array([ccc.brightness, ccc.redness, ccc.greenness, ccc.blueness, mono, flip[0], flip[1], border, sharpness, invert, grid, filt]) with m.If(debosd.btn_down): m.d.sync += osd_on.eq(~osd_on) with m.If(osd_on): with m.If(debsel.btn_down): m.d.sync += osd_sel.eq(~osd_sel) with m.If(debup.btn_down): with m.If(~osd_sel): m.d.sync += osd_val.eq(Mux(osd_val == 0, 11, osd_val-1)) with m.Else(): with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): if (len(osd_vals[i]) == 1): m.d.sync += osd_vals[i].eq(1) else: m.d.sync += osd_vals[i].eq(osd_vals[i]+1) with m.If(debdown.btn_down): with m.If(~osd_sel): m.d.sync += osd_val.eq(Mux(osd_val == 11, 0, osd_val+1)) with m.Else(): with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): if (len(osd_vals[i]) == 1): m.d.sync += osd_vals[i].eq(0) else: m.d.sync += osd_vals[i].eq(osd_vals[i]-1) # Show configuration values on leds with m.Switch(osd_val): for i in range(len(osd_vals)): with m.Case(i): m.d.comb += leds.eq(osd_vals[i]) # Generate VGA signals m.d.comb += [ vga.i_clk_en.eq(1), vga.i_test_picture.eq(0), vga.i_r.eq(osd.o_r), vga.i_g.eq(osd.o_g), vga.i_b.eq(osd.o_b), vga_r.eq(vga.o_vga_r), vga_g.eq(vga.o_vga_g), vga_b.eq(vga.o_vga_b), vga_hsync.eq(vga.o_vga_hsync), vga_vsync.eq(vga.o_vga_vsync), vga_blank.eq(vga.o_vga_blank), ] # VGA to digital video converter. tmds = [Signal(2) for i in range(4)] m.submodules.vga2dvid = vga2dvid = VGA2DVID(ddr=self.ddr, shift_clock_synchronizer=False) m.d.comb += [ vga2dvid.i_red.eq(vga_r), vga2dvid.i_green.eq(vga_g), vga2dvid.i_blue.eq(vga_b), vga2dvid.i_hsync.eq(vga_hsync), vga2dvid.i_vsync.eq(vga_vsync), vga2dvid.i_blank.eq(vga_blank), tmds[3].eq(vga2dvid.o_clk), tmds[2].eq(vga2dvid.o_red), tmds[1].eq(vga2dvid.o_green), tmds[0].eq(vga2dvid.o_blue), ] # GPDI pins if (self.ddr): # Vendor specific DDR modules. # Convert SDR 2-bit input to DDR clocked 1-bit output (single-ended) # onboard GPDI. m.submodules.ddr0_clock = Instance("ODDRX1F", i_SCLK = ClockSignal("shift"), i_RST = 0b0, i_D0 = tmds[3][0], i_D1 = tmds[3][1], o_Q = self.o_gpdi_dp[3]) m.submodules.ddr0_red = Instance("ODDRX1F", i_SCLK = ClockSignal("shift"), i_RST = 0b0, i_D0 = tmds[2][0], i_D1 = tmds[2][1], o_Q = self.o_gpdi_dp[2]) m.submodules.ddr0_green = Instance("ODDRX1F", i_SCLK = ClockSignal("shift"), i_RST = 0b0, i_D0 = tmds[1][0], i_D1 = tmds[1][1], o_Q = self.o_gpdi_dp[1]) m.submodules.ddr0_blue = Instance("ODDRX1F", i_SCLK = ClockSignal("shift"), i_RST = 0b0, i_D0 = tmds[0][0], i_D1 = tmds[0][1], o_Q = self.o_gpdi_dp[0]) else: m.d.comb += [ self.o_gpdi_dp[3].eq(tmds[3][0]), self.o_gpdi_dp[2].eq(tmds[2][0]), self.o_gpdi_dp[1].eq(tmds[1][0]), self.o_gpdi_dp[0].eq(tmds[0][0]), ] return m