def worker(input_q, output_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes detector_utils.get_pic(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # add frame annotated with bounding box to queue output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def __init__(self, haar_cascade_filepath, parent=None): ###############################Face initialization####################### super().__init__(parent) self.classifier = cv2.CascadeClassifier(haar_cascade_filepath) self.image = QtGui.QImage() self._red = (0, 0, 255) self._width = 2 self._min_size = (50, 50) self.l1 = QtWidgets.QLabel() ###############################Hand initialization####################### self.detection_graph, self.sess = detector_utils.load_inference_graph()
def worker(input_q, output_q, cropped_output_q, inferences_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) print(">> loading keras model for worker") try: model, classification_graph, session = classifier.load_KerasGraph( "./cnn/cnn.h5") except Exception as e: print(e) while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # get region of interest res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # draw bounding boxes detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # classify hand pose if res is not None: res = cv2.cvtColor(res, cv2.COLOR_RGB2GRAY) res = cv2.resize(res, (28, 28), interpolation=cv2.INTER_AREA) class_res = classifier.classify(model, classification_graph, session, res) inferences_q.put(class_res) # add frame annotated with bounding box to queue cropped_output_q.put(res) output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def worker(input_q, output_q, cap_params, frame_processed, points): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) prevPoint = None while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes detectedPoint = detector_utils.draw_box_on_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) point = detectedPoint if detectedPoint is not None: #print(detectedPoint) if prevPoint is not None: if aboveThreshhold( prevPoint, cap_params["im_height"]) and not aboveThreshhold( detectedPoint, cap_params["im_height"]): print("Detected: " + str(detectedPoint) + "Prev: " + str(prevPoint)) print("Under") points.put(detectedPoint) #else: #points.put((-1, -1)) #callHits.get()(detectedPoint[0], cap_params["im_height"], cap_params["im_width"]) #call circle collision thing #else: #points.put((-1, -1)) prevPoint = detectedPoint #else: #points.put((-1, -1)) # add frame annotated with bounding box to queue output_q.put(frame) frame_processed += 1 else: #points.put((-1, -1)) output_q.put(frame) sess.close()
def worker(input_q, output_q, cap_params, frame_processed): global called, c, ges, score if not called: gesture.load_model() called =True print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) while True: print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if frame is not None: # actual detection boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) details = { 'boxes': boxes, 'scores': scores } # draw bounding boxes cropped_image = detector_utils.draw_box_on_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['width'] , cap_params['height'], frame) if cropped_image is not None and c == 0: cropped_image = cv2.flip(cropped_image, 1) ges, score = gesture.predict(cropped_image/255) print(ges, score) details['frame'] = frame details['cropped_image'] = cropped_image details['ges'] = ges details['score'] = score output_q.put(details) frame_processed += 1 else: output_q.put({ 'boxes': [], 'frame': frame, 'ges': ges, 'score': score }) c = (c+1) % 10 sess.close()
def __init__(self): rospy.init_node('image_detection') self.bridge = CvBridge() self.inference_graph, self.sess = detector_utils.load_inference_graph() self.subscriber = rospy.Subscriber( '/camera/front/left/image_rect_color', Image, self.img_callback) self.publisher = rospy.Publisher('/vision/dice/debug_rcnn', Image, queue_size=1) # Parameters self.num_frames = 0 self.num_objects_detect = 1 self.score_thresh = 0.1 self.im_width = 1920 self.im_height = 1080 self.start_time = datetime.datetime.now()
def worker(input_q, output_q, boxes_q, inferences_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector.load_inference_graph() sess = tf.Session(graph=detection_graph) print(">> loading keras model for worker") try: model, classification_graph, session = classifier.load_KerasGraph( "cnn/models/hand_poses_wGarbage_10.h5") except Exception as e: print(e) try: while True: # print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector.detect_objects( frame, detection_graph, sess) # get region of interest box, res = detector.get_box_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # classify hand pose if res is not None: class_res = classifier.classify(model, classification_graph, session, res) inferences_q.put(class_res) output_q.put(frame) boxes_q.put(box) frame_processed += 1 else: output_q.put(frame) except Exception as e: print(e) sess.close()
def Worker(input_q, frameNumber_q, worker_q, worker_frame_q, id): global TERMINATE idleTimer = time.time() #Saves time at start of last frame PROCESSING = False #Indicates a video stream is being processed msg_timer = time.time() #Load hand recognition graph detection_graph, sess = detector_utils.load_inference_graph() while not (PROCESSING and (time.time() - idleTimer) > 10): if not input_q.empty(): PROCESSING = True #Becomes true when at least one frame has been received idleTimer = time.time() input_cv.acquire() while input_q.empty(): if TERMINATE: input_cv.release() break input_cv.wait() if TERMINATE: break image_np = input_q.get() frameNumber = frameNumber_q.get() input_cv.notifyAll() input_cv.release() image_np = cv2.cvtColor( image_np, cv2.COLOR_BGR2RGB) #Convert frame to correct color format boxes, scores = detector_utils.detect_objects( image_np, detection_graph, sess) #Detect hands frame_cv.acquire() while frameNumber != next_frame: frame_cv.wait() worker_q.put([boxes, scores]) if DISPLAY_OUTPUT or SAVE_OUTPUT: worker_frame_q.put(image_np) frame_cv.release() print("Worker " + str(id) + " exited")
def __init__(self): rospy.init_node('dice_detection') self.bridge = CvBridge() self.subscriber = rospy.Subscriber( '/camera/front/left/image_rect_color', Image, self.img_callback) self.publisher = rospy.Publisher( '/dice/debug_rcnn', Image, queue_size=1) self.dice_publisher = rospy.Publisher( '/dice/points', Point, queue_size=1) self.inference_graph, self.sess = detector_utils.load_inference_graph() # Parameters self.num_frames = 0 self.num_objects_detect = 4 self.score_thresh = 0.1 self.im_width = 1920 self.im_height = 1080 self.start_time = datetime.datetime.now()
def worker(input_q, output_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # actual detection boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # add frame annotated with bounding box to queue output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def worker_hand(input_q, output_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph( r'/home/testuser/tf_tut/obj_detection/handtracking/hand_inference_graph/frozen_inference_graph.pb' ) while True: print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # actual detection boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) output_q.put(frame) frame_processed += 1 else: output_q.put(frame) break sess.close()
def gesture(st="y"): detection_graph, sess = detector_utils.load_inference_graph() parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float, default=0.2, help='Score threshold for displaying bounding boxes') parser.add_argument('-fps', '--fps', dest='fps', type=int, default=1, help='Show FPS on detection/display visualization') parser.add_argument('-src', '--source', dest='video_source', default=0, help='Device index of the camera.') parser.add_argument('-wd', '--width', dest='width', type=int, default=320, help='Width of the frames in the video stream.') parser.add_argument('-ht', '--height', dest='height', type=int, default=180, help='Height of the frames in the video stream.') parser.add_argument('-ds', '--display', dest='display', type=int, default=1, help='Display the detected images using OpenCV. This reduces FPS') parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int, default=4, help='Number of workers.') parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int, default=5, help='Size of the queue.') args = parser.parse_args() #return('gesture') #global testDir #global img_rows, img_cols, maxFrames #global depthFrame #crop parameter #global xupam #global yupam #global xdpam #global ydpam #global classGest #global delayGest #global delayBol #load the model and weight #global json_file #global loaded_model_json #global loaded_model #setup cv face detection #global face_cascade # setup Realsense # Configure depth and color streams # for text purpose global handin global txt global txtLoad global txtDelay global txtRecord global txtDel global txtProbability global font global to_reload global app global currentState #global framearray #global ctrDelay #global channel #global gestCatch #global gestStart #global x,y,w,h #global vc #global rval , firstFrame #global heightc, widthc, depthcol #global imgTxt #global resultC #global count #global stat pipeline = rs.pipeline() K.clear_session() testDir = "videoTest/" img_rows, img_cols, maxFrames = 50, 50, 55 depthFrame = 0 #crop parameter xupam = 350 yupam = 200 xdpam = 250 ydpam = 300 depthFrame = 0 cameraHeightR = 480 #cameraWidthR = 848 cameraWidthR = 848 frameRateR = 60 #classGest = ['1','11','12','13','4','5','7','8'] classGest = ['1', '12', '13', '14', '15', '3', '4', '5', '8', '9'] nameGest = ['call', 'scroll up', 'scroll down', 'right', 'left', "like", 'play/pause', 'close', 'click', 'back'] delayGest = 10 delayBol = False framearray = [] ctrDelay = 0 channel = 1 gestCatch = False gestStart = False backgroundRemove = True x,y,w,h = 0,0,0,0 count=0 boxCounter = True #load the model and weight json_file = open('3dcnnresult/34/3dcnnmodel.json', 'r') loaded_model_json = json_file.read() json_file.close() # load weights into new model loaded_model = model_from_json(loaded_model_json) #loaded_model.load_weights("3dcnnresult/3dcnnmodel.hd5") loaded_model.load_weights("3dcnnresult/34/3dcnnmodel.hd5") loaded_model.compile(loss=categorical_crossentropy, optimizer='adam', metrics=['accuracy']) #setup cv face detection conf = loaded_model.model.get_config() shapeInput, ap = loaded_model.model.get_layer(name="dense_2").input_shape shapeOutput, sp = loaded_model.model.get_layer(name="dense_2").output_shape weightDense2 = loaded_model.model.get_layer(name="dense_2").get_weights()[0] weightDense22I = loaded_model.model.get_layer(name="dense_2").get_weights()[1] weightDense22A = loaded_model.model.get_layer(name="dense_2").get_weights()[1] face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt2.xml') if(st=="x"): print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") ''' profiles = config.resolve(pipeline) device = profiles.get_device() ctx = rs.context() device.hardware_reset() gesture() ''' ytfsmfile = "youtube_fsm.txt" ytFsmTable = loadFSM(ytfsmfile) else: ytfsmfile = "fb_fsm.txt" ytFsmTable = loadFSM(ytfsmfile) updatedWeight = False updatedWeight2 = False config = rs.config() config.enable_stream(rs.stream.depth, cameraWidthR, cameraHeightR, rs.format.z16, frameRateR) config.enable_stream(rs.stream.color, cameraWidthR, cameraHeightR, rs.format.bgr8, frameRateR) '''vc = cv2.VideoCapture(0) rval , firstFrame = vc.read() heightc, widthc, depthcol = firstFrame.shape imgTxt = np.zeros((heightc, 400, 3), np.uint8) #print('tryyyyy1')''' stat =0 # some_queue.put("something") # Start streaming profile = pipeline.start(config) # Getting the depth sensor's depth scale (see rs-align example for explanation) depth_sensor = profile.get_device().first_depth_sensor() depth_scale = depth_sensor.get_depth_scale() # print "Depth Scale is: " , depth_scale # We will be removing the background of objects more than # clipping_distance_in_meters meters away clipping_distance_in_meters = 2 # 1 meter clipping_distance = clipping_distance_in_meters / depth_scale #print("tes====================") # for text purpose imgTxt = np.zeros((480, 400, 3), np.uint8) txt = "OpenCV" txtLoad = "[" txtDelay = "[" txtRecord = "Capture" txtDel = "Delay" txtProbability = "0%" font = cv2.FONT_HERSHEY_SIMPLEX align_to = rs.stream.color align = rs.align(align_to) #print("Loaded model from disk") count=0 num_frames = 0 im_width, im_height = (848, 480) # max number of hands we want to detect/track num_hands_detect = 1 score_thresh = 0.37 while True: if True: dataImg = [] # Wait for a coherent pair of frames: depth and color try: frames = pipeline.wait_for_frames() # Align the depth frame to color frame aligned_frames = align.process(frames) #depth_frame = frames.get_depth_frame() #color_frame = frames.get_color_frame() # Get aligned frames aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image color_frame = aligned_frames.get_color_frame() depth_image = np.asanyarray(aligned_depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) # Validate that both frames are valid if not aligned_depth_frame or not color_frame: continue except: pipeline.stop() print("Error wait_for_frames wait_for_frames") gesture() num_frames += 1 image_np = color_image try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") boxes, scores = detector_utils.detect_objects( image_np, detection_graph, sess) for i in range(num_hands_detect): if (scores[i] > score_thresh): handin = True #print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") break if handin == True: gestStart = True else: gestStart = False # if not depth_frame or not color_frame: #if not color_frame: #continue # Convert images to numpy arrays #depth_image = np.asanyarray(depth_frame.get_data()) #color_image = np.asanyarray(color_frame.get_data()) if(backgroundRemove == True): # Remove background - Set pixels further than clipping_distance to grey #grey_color = 153 grey_color = 0 depth_image_3d = np.dstack((depth_image, depth_image, depth_image)) # depth image is 1 channel, color is 3 channels bg_removed = np.where((depth_image_3d > clipping_distance) | (depth_image_3d <= 0), grey_color, color_image) #draw_image = bg_removed color_image = bg_removed draw_image = color_image else: draw_image = color_image #face detection here gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY) # faces = face_cascade.detectMultiScale(gray, 1.1, 2) # # if len(faces) > 0 and gestCatch == False: # gestStart = True # x, y, w, h = faces[0] # else: # x, y, w, h = x, y, w, h # # fArea = w*h if gestCatch == False: faces = face_cascade.detectMultiScale(gray, 1.1, 2) #print("face: ",len(faces)) ctr = 0 idxFace = -1 minDist = 9999 if len(faces) > 0: for f in faces: xh, yh, wh, hh = f farea = wh * hh midxf = int(xh + (wh * 0.5)) midyf = int(yh + (hh * 0.5)) depth_imageS = depth_image * depth_scale deptRef = depth_imageS.item(midyf, midxf) if deptRef <= minDist: idxFace = ctr minDist = deptRef ctr = ctr+1 #print("id face", idxFace) if idxFace >= 0: x, y, w, h = faces[idxFace] #cv2.rectangle(draw_image, (x, y), (x + w, y + h), (255, 0, 0), 2) #print(fArea) #if fArea > 3000: # crop the face then pass to resize #cv2.rectangle(draw_image, (x, y), (x + w, y + h), (255, 0, 0), 2) midx = int(x + (w * 0.5)) midy = int(y + (h * 0.5)) xUp = (x - (w * 3)) yUp = (y - (h * 1.5)) xDn = ((x + w) + (w * 1)) yDn = ((y + h) + (h * 2)) if xUp < 1: xUp = 0 if xDn >= 848: xDn = 848 if yUp < 1: yUp = 0 if yDn >= 480: yDn = 480 #cv2.rectangle(draw_image, (xUp.__int__(), yUp.__int__()), (xDn.__int__(), yDn.__int__()), (0, 0, 255), 2) #cv2.circle(draw_image, (midx.__int__(), midy.__int__()), 10, (255, 0, 0)) # roi_color = color_image[yUp.__int__():yDn.__int__(), xUp.__int__():xDn.__int__()] roi_gray = gray[yUp.__int__():yDn.__int__(), xUp.__int__():xDn.__int__()] #find the depth of middle point of face # if handin == True and depthFrame==10: # depth_imageS = depth_image * depth_scale # deptRef = depth_imageS.item(midy, midx) # boxColor = color_image.copy() # checkhandIn(boxCounter, deptRef, midx, midy, w, h, depth_imageS, boxColor, draw_image,handin) if backgroundRemove == True and gestCatch == False: depth_imageS = depth_image*depth_scale deptRef = depth_imageS.item(midy, midx) #print(clipping_distance) clipping_distance = (deptRef + 0.2)/depth_scale boxColor = color_image.copy() # handin = checkhandIn(boxCounter, deptRef, midx, midy, w, h, depth_imageS, boxColor, draw_image) # if handin == True: # gestStart = True # else: # gestStart = False #print("handinnnnnnnnnnnn "+str(gestStart)) if delayBol == False and gestStart == True: if depthFrame < maxFrames: # frame = cv2.resize(roi_color, (img_rows, img_cols)) frame = cv2.resize(roi_gray, (img_rows, img_cols)) framearray.append(frame) depthFrame = depthFrame + 1 txtLoad = txtLoad + "[" count=count+1 gestCatch = True #print(depthFrame) if depthFrame == maxFrames: dataImg.append(framearray) xx = np.array(dataImg).transpose((0, 2, 3, 1)) X = xx.reshape((xx.shape[0], img_rows, img_cols, maxFrames, channel)) X = X.astype('float32') #print('X_shape:{}'.format(X.shape)) #==================== Update the Weight ======================================= newWeightI = [] newWeightA = [] availableGest = getGesture(currentState, ytFsmTable) availG, ignoreG = translateAvailableGest(availableGest, classGest) if updatedWeight: weightI = manipWeight(weightDense22I, ignoreG, 1000) newWeightI.append(weightDense2) newWeightI.append(weightI) if updatedWeight2: maxClass = 10 weightA = manipWeight(weightDense22A, availG, 1000) newWeightA.append(weightDense2) newWeightA.append(weightA) #================================================================================= if updatedWeight == False and updatedWeight2 == False: newWeightI.append(weightDense2) newWeightI.append(weightDense22A) loaded_model.model.get_layer(name="dense_2").set_weights(newWeightI) if handin==True: # do prediction resc = loaded_model.predict_classes(X)[0] res = loaded_model.predict_proba(X)[0] # find the result of prediction gesture resultC = classGest[resc] nameResultG = nameGest[resc] for a in range(0, len(res)): print("Gesture {}: {} ".format(str(nameGest[a]), str(res[a] * 100))) else: resultC = 0 nameResultG = "not enough frame recorded" print("===============================================================") if updatedWeight2: loaded_model.model.get_layer(name="dense_2").set_weights(newWeightA) # do prediction resc2 = loaded_model.predict_classes(X)[0] res2 = loaded_model.predict_proba(X)[0] # find the result of prediction gesture resultC2 = classGest[resc2] nameResultG2 = nameGest[resc2] #imgTxt = np.zeros((480, 400, 3), np.uint8) if updatedWeight2: if res2[resc2] > res[resc]: txt = "ignored gesture" act = -1 else: txt = nameResultG act = resultC else: txt = nameResultG act = resultC #show text # check with FSM for finding the next state currentState = getNextState(currentState, act, ytFsmTable) txt = "Gesture-" + str(resultC) # txtProbability = str(res[resc]*100)+"%" framearray = [] #dataImg = [] txtLoad = "" depthFrame = 0 handin = False gestCatch = False delayBol = True #cv2.putText(imgTxt, txtLoad, (10, 20), font, 0.1, (255, 255, 255), 2, cv2.LINE_AA) #.putText(imgTxt, txtRecord, (10, 50), font, 1, (255, 255, 255), 2, cv2.LINE_AA) #cv2.putText(imgTxt, txt, (10, 200), font, 2, (255, 255, 255), 2, cv2.LINE_AA) #cv2.putText(imgTxt, txtProbability, (10, 250), font, 1, (255, 255, 255), 2, cv2.LINE_AA) # Apply colormap on depth image (image must be converted to 8-bit per pixel first) # depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET) #print(delayBol) if delayBol == True: ctrDelay = ctrDelay+1 txtDelay = txtDelay + "[" #txtDel = "Delay" #cv2.putText(imgTxt, txtDelay, (10, 70), font, 0.1, (255, 255, 255), 2, cv2.LINE_AA) #cv2.putText(imgTxt, txtDel, (10, 100), font, 1, (255, 255, 255), 2, cv2.LINE_AA) if ctrDelay == delayGest: ctrDelay = 0 txtDelay = "" delayBol = False # Stack both images horizontally #images = np.hstack((draw_image, imgTxt)) # put the text here # Show images #cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE) #cv2.imshow('RealSense', images) #cv2.waitKey(1) if count==maxFrames: #vc.release() K.clear_session() pipeline.stop() sess.close() return act '''
def get_inference_vector_one_frame_alphabet(): # model trained based on https://www.kaggle.com/mrgeislinger/asl-rgb-depth-fingerspelling-spelling-it-out model = HandShapeFeatureExtractor.get_instance() vectors = [] video_names = [] detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) count = 0 myFiles = glob.glob('data/*.mp4') for file in myFiles: # if os.path.exists('{}'.format(file[:-4])): # os.remove('{}'.format(file[:-4])) os.mkdir('{}'.format(file[:-4])) cam = cv2.VideoCapture("{}".format(file)) currentframe = 0 path = '{}/'.format(file[:-4]) bug = 0 while(True): # reading from frame ret,frame = cam.read() if ret: # if video is still left continue creating images name = 'frame' + str(currentframe) + '.png' # print ('Creating...' + name) if (currentframe + bug) % 30 == 0: # writing the extracted images #cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) bug = 0 image_np = frame image_np = cv2.rotate(image_np, cv2.ROTATE_90_COUNTERCLOCKWISE) h, w, c = image_np.shape im_width = w im_height = h try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sess) # draw bounding boxes on frame cropped_image = detector_utils.draw_box_on_image(1, 0.2, scores, boxes, im_width, im_height, image_np) img = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY) results = model.extract_feature(img) results = np.squeeze(results) vectors.append(results) cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR) cv2.imwrite(path + name, cropped_image) except: bug += 1 # increasing counter so that it will # show how many frames are created currentframe += 1 else: break # Release all space and windows once done cam.release() cv2.destroyAllWindows() sess.close() return vectors
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"]) detection_graph, sess = detector_utils.load_inference_graph() left_hand_model = config["model_left"] started = True delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None count = 0 mbv_viz = opviz = None smoothing = config.get("smoothing", 0) boxsize = config["boxsize"] stride = config["stride"] peaks_thre = config["peaks_thre"] print("Entering main Loop.") # bbox = config['default_bbox'] while True: print('Estimating pose for frame {}'.format(count)) try: imgs, clbs = acq.grab() if imgs is None or len(imgs) == 0: break except Exception as e: print("Failed to grab", e) break st = time.time() bgr = imgs[0] #print(bgr) clb = clbs[0] # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject( pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) score = -1 result_pose = None crop_viz = None # STEP2 detect 2D joints for the detected hand. if started: if bbox is None: bbox = detector_utils.hand_bbox(bgr, detection_graph, sess) # bbox = config['default_bbox'] if bbox is None: cv2.imshow("2D CNN estimation", bgr) cv2.waitKey(1) # to_AC_format(np.zeros((21,3))) # count+=1 continue else: dbox = detector_utils.hand_bbox(bgr, detection_graph, sess) if dbox is not None: x, y, w, h = bbox x1, y1, w1, h1 = dbox if (x1 > x + w or x1 + w1 < x) or y1 + h1 < y or y1 > y + h: bbox = dbox print("updated") x, y, w, h = bbox # print("BBOX: ",bbox) crop = bgr[y:y + h, x:x + w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride / scale ocparts = np.zeros_like(hm[..., 0]) peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale, scale) # convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([ rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) # pose_estimator.print_report() if track: result_pose = list( smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score: #< 150: pose_estimator.model.init_pose = Core.ParamVector( result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = None if score > -1: # compute result points p2d = np.array( pose_estimator.ba.decodeAndProject( Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d, bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre) cv2.imshow("2D CNN estimation", viz2d) header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % ( 1 / (time.time() - st), 1 / (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = None print("RESETING BBOX", bbox) if key & 255 == ord('s'): started = not started p3d = np.array( pose_estimator.ba.decode(Core.ParamVector(result_pose), clb)).reshape(-1, 3) count += 1
from xlutils.copy import copy import numpy as np lst1 = [] lst2 = [] ap = argparse.ArgumentParser() ap.add_argument( '-d', '--display', dest='display', type=int, default=1, help='Display the detected images using OpenCV. This reduces FPS') args = vars(ap.parse_args()) model = detector_utils.load_inference_graph() def save_data(no_of_time_hand_detected, no_of_time_hand_crossed): try: today = date.today() today = str(today) #loc = (r'C:\Users\rahul.tripathi\Desktop\result.xls') rb = xlrd.open_workbook('result.xls') sheet = rb.sheet_by_index(0) sheet.cell_value(0, 0) #print(sheet.nrows) q = sheet.cell_value(sheet.nrows - 1, 1)
def worker(input_q, output_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() model = tf.keras.models.load_model('./saved_model2/') ################# # all the following variables store data for sliding window sw_size = 30 # sliding window size trajecX = [] # trajectory x coord trajecY = [] # trajectory y coord preds = [] # classified classes preds_freq = np.zeros(5) # frequency of each class ################# screenWd, screenHt = pyautogui.size() frameWd, frameHt = cap_params['im_width'], cap_params['im_height'] scaleX = 1.0 * screenWd / frameWd scaleY = 1.0 * screenHt / frameHt state = -1 rad_in, rad_mid, rad_out= 25, 90, 200 # radii of annulus last_swipe, swipe_cnt = -1, 0 # will provide sanitary check so same swipes are not performed twice consecutively while True: # print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # print(nm, "Not None") im_width = cap_params['im_width'] im_height = cap_params['im_height'] boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # get cropped image cropped_img = detector_utils.get_box_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # draw bounding boxes centroidX, centroidY = detector_utils.draw_box_on_image( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) ## enable this if only want to visualize the bounding box ## and blur rest of the image # visualize_blurred(cap_params['num_hands_detect'], cap_params['score_thresh'], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # negative centroid means no hand is detected if centroidX < 0: frame_processed += 1 output_q.put(frame) # cropped_output_q.put(cropped_img) continue # Updation of centroid and add to trajectory if needed if centroidX > 0: if len(trajecX) == 0: trajecX.append(centroidX) trajecY.append(centroidY) else: px, py = trajecX[-1], trajecY[-1] d = dist(centroidX, centroidY, px, py) if d <= rad_in or (d > rad_mid and d < rad_out): centroidX, centroidY = px, py trajecX.append(centroidX) trajecY.append(centroidY) # move sliding window if len(trajecX) > sw_size: trajecX = trajecX[-sw_size:] trajecY = trajecY[-sw_size:] # draw centroid if len(trajecX) > 0: x, y = trajecX[-1], trajecY[-1] # frame = visualize_blurred(cap_params['num_hands_detect'], cap_params['score_thresh'], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # visualize_roi(cap_params['num_hands_detect'], (x, y), # cap_params['score_thresh'], scores, # rad_in, rad_mid, rad_out, frame) detector_utils.draw_centroid_on_image( cap_params['num_hands_detect'], (x, y), cap_params["score_thresh"], scores, frame) # Static gesture prediction if cropped_img is not None: cropped_img, idx = classify(cropped_img, model) detector_utils.draw_label_on_image(mp[idx], frame) preds.append(idx) preds_freq[idx] += 1 if len(preds) > sw_size: preds_freq[preds[0]] -= 1 preds = preds[-sw_size:] if preds[-1] == 1 or preds[-1] == 3: state = control_vlc(last_swipe, preds[-1], state) # Control mouse pointer # if len(trajecX) > 1: # x = int((trajecX[-1] - trajecX[-2]) ) # y = int((trajecY[-1] - trajecY[-2]) ) # control_mouse_pointer(x, y, screenWd, screenHt, preds[-1], preds[-2]) # Dynamic gesture prediction if len(trajecX) > 0: draw_trajectory(trajecX, trajecY, frame) p = probab(preds_freq) d = dist(trajecX[-1], trajecY[-1], trajecX[0], trajecY[0]) # print(d, p) if p > 0.5 and d > 110: direc = get_direction(trajecX[-1], trajecY[-1], trajecX[0], trajecY[0]) if not last_swipe == direc: # print("swipe {} count: ".format(mp2[last_swipe]), swipe_cnt) swipe_cnt = 0 else: swipe_cnt += 1 last_swipe = direc if swipe_cnt % 6== 0 and swipe_cnt > 0: print("swipe {} count: ".format(mp2[last_swipe]), swipe_cnt) state = control_vlc(last_swipe, preds[-1], state) detector_utils.draw_label_on_image(mp2[direc], frame, (500, 100)) frame_processed += 1 output_q.put(frame) # cropped_output_q.put(cropped_img) else: output_q.put(frame)
def main(): currentPath = '' currentExample = '' print('Do you want to : \n 1 - Add new pose \ \n 2 - Add examples to existing pose \ \n 3 - Add garbage examples') menu_choice = input() while menu_choice != '1' and menu_choice != '2' and menu_choice != '3': print('Please enter 1 or 2') menu_choice = input() if menu_choice == '1': print('Enter a name for the pose you want to add :') name_pose = input() # Create folder for pose if not os.path.exists('Poses/' + name_pose): os.makedirs('Poses/' + name_pose + '/' + name_pose + '_1/') currentPath = 'Poses/' + name_pose + '/' + name_pose + '_1/' currentExample = name_pose + '_1_' elif menu_choice == '2': # Display current poses dirs = os.listdir('Poses/') dirs.sort() dirs_choice = '' possible_choices = [] i = 1 for _dir in dirs: dirs_choice += str(i) + ' - ' + str(_dir) + ' / ' possible_choices.append(str(i)) i += 1 # Ask user to choose to which pose to add examples print('Choose one of the following pose:') print(dirs_choice) choice = input() while not choice in possible_choices and dirs[int(choice) - 1] == 'garbage': print('Please enter one of the following (not garbage): ' + str(possible_choices)) choice = input() # Count number of files to increment new example directory subdirs = os.listdir('Poses/' + dirs[int(choice) - 1] + '/') index = len(subdirs) + 1 # Create new example directory if not os.path.exists('Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/'): os.makedirs('Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/') # Update current path currentPath = 'Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/' currentExample = dirs[int(choice) - 1] + '_' + str(index) + '_' name_pose = dirs[int(choice) - 1] elif menu_choice == '3': # Create folder for pose if not os.path.exists('Poses/Garbage/'): os.makedirs('Poses/Garbage/Garbage_1/') currentPath = 'Poses/Garbage/Garbage_1/' currentExample = 'Garbage_1_' name_pose = 'Garbage' else: # Count number of files to increment new example directory subdirs = os.listdir('Poses/Garbage/') index = len(subdirs) + 1 # Create new example directory if not os.path.exists('Poses/Garbage/Garbage_' + str(index) + '/'): os.makedirs('Poses/Garbage/Garbage_' + str(index) + '/') # Update current path currentPath = 'Poses/Garbage/Garbage_' + str(index) + '/' currentExample = 'Garbage_' + str(index) + '_' name_pose = 'Garbage' print('You\'ll now be prompted to record the pose you want to add. \n \ Please place your hand beforehand facing the camera, and press any key when ready. \n \ When finished press \'q\'.') input() # Begin capturing cap = cv2.VideoCapture(0) # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(currentPath + name_pose + '.avi', fourcc, 25.0, (640, 480)) while cap.isOpened(): ret, frame = cap.read() if ret: # write the frame out.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break # Release everything if job is finished cap.release() out.release() cv2.destroyAllWindows() vid = cv2.VideoCapture(currentPath + name_pose + '.avi') # Check if the video if not vid.isOpened(): print('Error opening video stream or file') return print('>> loading frozen model..') detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) print('>> model loaded!') _iter = 1 # Read until video is completed while vid.isOpened(): # Capture frame-by-frame ret, frame = vid.read() if ret: print(' Processing frame: ' + str(_iter)) # Resize and convert to RGB for NN to work with frame = cv2.resize(frame, (320, 180), interpolation=cv2.INTER_AREA) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Detect object boxes, scores = detector_utils.detect_objects(frame, detection_graph, sess) # get region of interest res = detector_utils.get_box_image(1, 0.2, scores, boxes, 320, 180, frame) # Save cropped image if res is not None: cv2.imwrite(currentPath + currentExample + str(_iter) + '.png', cv2.cvtColor(res, cv2.COLOR_RGB2BGR)) _iter += 1 # Break the loop else: break print(' Processed ' + str(_iter) + ' frames!') vid.release()
def worker(input_q, output_q, cap_params, frame_processed, poses): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) print(">> loading keras model for worker") try: model, classification_graph, session = classifier.load_KerasGraph( "F:\Realtime_Hand_tracking\cnn\models\hand_poses_wGarbage_100.h5") except Exception as e: print(e) detection_centres_x = [] detection_centres_y = [] is_centres_filled = False detected = False index = 0 detection_area = [] start_flag = False flag_start = pause_time = 0 sensitivity = gui_sensitivity area = centre_x = centre_y = 0 detection = "" direction = "" while True: predicted_label = "" frame = input_q.get() if (frame is not None): frame_processed += 1 boxes, scores = detector_utils.detect_objects(frame, detection_graph, sess) # get region of interest res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params['score_thresh'], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # get boundary box if pause_time == 0: centre_x, centre_y, area = detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) if pause_time > 0: pause_time -= 1 if is_centres_filled: detection_centres_x = detection_centres_x[1:10] detection_centres_y = detection_centres_y[1:10] detection_area = detection_area[1:10] detection_centres_x.append(centre_x) detection_centres_y.append(centre_y) detection_area.append(area) else: detection_centres_x.append(centre_x) detection_centres_y.append(centre_y) detection_area.append(area) if pause_time == 0: index += 1 if index >= sensitivity: index = 0 is_centres_filled = True if index == 0: predicted_label = classify(res, model, classification_graph, session, poses) #print(predicted_label) if predicted_label == "Start" and flag_start == 0: #print("Start") detection = "Start tracking" start_flag = True flag_start = 1 if detected: detection_centres_x = [] detection_centres_y = [] is_centres_filled = False index = 0 detected = False detection_area = [] frame_processed = 0 pause_time = 30 centres_x = detection_centres_x.copy() centres_y = detection_centres_y.copy() areas = detection_area.copy() centres_x = [v for v in centres_x if v] centres_y = [v for v in centres_y if v] areas = [a for a in areas if a] # angle_coordinate if len(centres_x) > 3 and is_centres_filled and len(centres_y) > 3 and len(areas) > 3 and start_flag : flag = 0 dX = centres_x[-1] - centres_x[0] dY = centres_y[-1] - centres_y[0] if dX > 20 and dY > 20: m = dY / dX angle = math.degrees(math.atan(m)) if angle < 45: flag = 1 elif angle > 45: flag = 2 if dX > 100 and (abs(dY) < 20 or flag == 1): direction = "Right" keyboard.press_and_release('right') detected = True #print(direction) elif -dX > 100 and (abs(dY) < 20 or flag == 1): direction = "Left" keyboard.press_and_release('left') detected = True #print(direction) elif dY > 50 and (abs(dX) < 10 or flag == 2): direction = "Down" detected = True #print(direction) elif -dY > 50 and (abs(dX) < 10 or flag == 2): direction = "Up" detected = True #print(direction) elif areas[-1] - 3000 > areas[0] and abs(dX) < 30 and abs(dY) < 20: direction = "Zoom in" detected = True #print(direction) elif areas[-1] < areas[0] - 3000 and abs(dX) < 10 and abs(dY) < 20: direction = "Zoom out" detected = True #print(direction) output_q.put((frame, direction,predicted_label)) sess.close()
for rank, rect in enumerate(hands): detect_count += 1 iou = calcIoU(rect, frame) if iou > max_iou: max_iou = iou if max_iou > iou_threshod: pass_count += 1 endt = time.time() print( "No.{} Handle {} images and elapsed_time is {:.2f}s, detect {} and pass {} ." .format(int(50 * score_thresh), len(label_dict), (endt - start), detect_count, pass_count)) return pass_count, detect_count, scan_hand if __name__ == '__main__': to_csv = '/home/zgwu/HandImages/long_video/test_frames/images.csv' beyond_iou_threshod = [] local_label_dict = read_test_csv_from(to_csv) local_detection_graph, local_sessd = detector_utils.load_inference_graph( ) # ssd to detect hands for i in range(50): score_th = float(i / 50) pass_cnt, detect_cnt, scan_hand = record_tracker( local_detection_graph, local_sessd, local_label_dict, score_th) beyond_iou_threshod.append( (pass_cnt / scan_hand, detect_cnt / scan_hand, scan_hand / 1404)) for i in range(len(beyond_iou_threshod)): x, y, z = beyond_iou_threshod[i] print(x, y, z, sep=' ')
def worker(input_q, output_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) prev_areas = [] prev_toplefts = [] frame_count = 0 while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) toplefts, bottomrights, areas = detector_utils.get_corners( cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height']) if frame_count >= 2: if len(prev_areas) == 0 or len(areas) == 0: pass else: dist = {} for current_index in range(len(toplefts)): for prev_index in range(len(prev_toplefts)): dist[point_distance( toplefts[current_index], prev_toplefts[prev_index])] = (current_index, prev_index) indices = dist[min(dist.keys())] current_area = areas[current_index] prev_area = prev_areas[prev_index] if compare_areas(current_area, prev_area, 2): print("current:", current_area) print("prev:", prev_area) splatters.append( Splatter(toplefts[current_index], bottomrights[current_index])) frame_count = 0 if len(toplefts) == 2 and len(prev_toplefts) == 2: if compare_areas(areas[1 - current_index], prev_areas[1 - prev_index], 2): print("current:", current_area) print("prev:", prev_area) splatters.append( Splatter(toplefts[1 - current_index], bottomrights[1 - current_index])) frame_count = 0 else: frame_count += 1 # for x in range(0, len(toplefts)): # splatters.append(Splatter(toplefts[x], bottomrights[x])) for splotch in splatters: if splotch.opacity == 0: splatters.remove(splotch) continue roi = frame[splotch.topleft[1]:splotch.bottomright[1], splotch.topleft[0]:splotch.bottomright[0]] background = roi.copy() overlap = roi.copy() background[splotch.outline[:, :, 3] != 0] = (0, 0, 0) overlap[splotch.outline[:, :, 3] == 0] = (0, 0, 0) overlap_area = cv2.addWeighted(overlap, 1 - splotch.opacity, splotch.outline[:, :, 0:3], splotch.opacity, 0) dst = cv2.add(overlap_area, background) frame[splotch.topleft[1]:splotch.bottomright[1], splotch.topleft[0]:splotch.bottomright[0]] = dst splotch.fade() prev_areas = areas.copy() prev_toplefts = toplefts.copy() # add frame with splatters to queue (below) output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
from xlutils.copy import copy import numpy as np lst1 = [] lst2 = [] ap = argparse.ArgumentParser() ap.add_argument( '-d', '--display', dest='display', type=int, default=1, help='Display the detected images using OpenCV. This reduces FPS') args = vars(ap.parse_args()) detection_graph_m, sess_m = detector_utils.load_inference_graph( detector_utils.PATH_TO_CKPT_M) def save_data(no_of_time_hand_detected, no_of_time_hand_crossed): try: today = date.today() today = str(today) # loc = (r'C:\Users\rahul.tripathi\Desktop\result.xls') rb = xlrd.open_workbook('result.xls') sheet = rb.sheet_by_index(0) sheet.cell_value(0, 0) # print(sheet.nrows) q = sheet.cell_value(sheet.nrows - 1, 1)
def worker(input_q, output_q, cropped_output_q, inferences_q, landmark_ouput_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor( "landmarks/shape_predictor_68_face_landmarks.dat") print(">> loading keras model for worker") try: model = tf.keras.models.load_model('models/asl_char_model.h5', compile=False) except Exception as e: print(e) while True: frame = input_q.get() if (frame is not None): boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # get region of interest res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # draw bounding boxes detector_utils.draw_box_on_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # classify hand if res is not None: class_res = "" try: proba = model.predict(process_image(res))[0] mx = np.argmax(proba) score = proba[mx] * 100 sequence = categories[mx][1] class_res = str(score) + "/" + sequence except: score = 0.0 sequence = "" class_res = "empty" inferences_q.put(class_res) image_np1 = imutils.resize(frame, width=400) gray = cv2.cvtColor(image_np1, cv2.COLOR_BGR2GRAY) #lanmarking # detect faces in the grayscale frame rects = detector(gray, 0) # loop over the face detections for rect in rects: # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) # loop over the (x, y)-coordinates for the facial landmarks # and draw them on the image for (x, y) in shape: cv2.circle(image_np1, (x, y), 1, (0, 0, 255), -1) # add frame annotated with bounding box to queue landmark_ouput_q.put(image_np1) cropped_output_q.put(res) output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def worker(input_q, output_q, cap_params, frame_processed, movement, movement_threshold): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) old_centers = [None] * cap_params["num_hands_detect"] centers = [None] * cap_params["num_hands_detect"] while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) # draw bounding boxes if any(centers): # If both hands detected if all(old_centers) and all(centers): moved1 = calculate_movement(old_centers[0], centers[0]) moved1 += calculate_movement(old_centers[1], centers[1]) moved1 = moved1 / 2 moved2 = calculate_movement(old_centers[1], centers[0]) moved2 += calculate_movement(old_centers[0], centers[1]) moved2 = moved2 / 2 moved = min(moved1, moved2) # Try to match hand movement to closest hand postion before else: moved1 = calculate_movement(old_centers[0], centers[0]) if old_centers[1]: moved2 = calculate_movement(old_centers[1], centers[0]) else: moved2 = 99999 if centers[1]: moved3 = calculate_movement(old_centers[0], centers[1]) else: moved3 = 99999 moved = min(moved1, moved2, moved3) if moved > movement_threshold: # moved = 0 pass with movement.get_lock(): movement.value += int(moved) old_centers = centers centers = detector_utils.draw_box_on_image( cap_params["num_hands_detect"], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame) # add frame annotated with bounding box to queue output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def run(self): detection_graph, sess = detector_utils.load_inference_graph() # parameters for loading data and images detection_model_path = 'haarcascade_frontalface_default.xml' emotion_model_path = 'fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detection = load_detection_model(detection_model_path) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] # starting lists for calculating modes emotion_window = [] start_time = datetime.datetime.now() im_width, im_height = (400, 350) num_hands_detect = 2 # max number of hands we want to detect/track, can scale this up min_threshold = 0.2 old_points = [None] * num_hands_detect cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL) while True: # Expand dimensions since the model expects images to have shape: [1, None, None, 3] if self.image_queue.empty(): continue img_data = base64.b64decode(str(self.image_queue.get())) image_np = np.asarray(Image.open(io.BytesIO(img_data))) image_np = cv2.flip(image_np, 1) gray_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) faces = detect_faces(face_detection, gray_image) # Actual detection. Variable boxes contains the bounding box cordinates for hands detected, # while scores contains the confidence for each of these boxes. # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold) boxes, scores = detector_utils.detect_objects( image_np, detection_graph, sess) valid_hands = 0 calc_displacement = False for score in scores: if score > min_threshold: valid_hands += 1 if valid_hands == num_hands_detect: calc_displacement = True #tell function to return new displacement # draw bounding boxes on frame self.total_displacement += detector_utils.draw_box_on_image( num_hands_detect, min_threshold, scores, boxes, im_width, im_height, image_np, old_points, calc_displacement) #0.2 is the min threshold # Calculate Frames per second (FPS) self.num_frames += 1 elapsed_time = (datetime.datetime.now() - start_time).total_seconds() fps = self.num_frames / elapsed_time if self.current_emotion in self.emotions: self.emotions[self.current_emotion] += 1 else: self.emotions[self.current_emotion] = 1 print(self.total_displacement / (10 * self.num_frames), self.current_emotion, self.emotions) for face_coordinates in faces: x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) self.current_emotion = emotion_labels[emotion_label_arg] emotion_window.append(self.current_emotion) if len(emotion_window) > frame_window: emotion_window.pop(0) try: emotion_mode = mode(emotion_window) except: continue draw_bounding_box(face_coordinates, image_np) # Display FPS on frame: detector_utils.draw_fps_on_image("FPS : " + str(int(fps)), image_np) cv2.imshow('Single-Threaded Detection', cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break
def __init__(self): ''' Parameters ''' parser = argparse.ArgumentParser( description='TensorFlow Object Detection', usage='''Default parameters: None''') parser.add_argument('-g', '--garlic', action='store_true', help='Sets up Network for Bin Dropper') parser.add_argument('-v', '--vamp', action='store_true', help='Sets up Network for the Buoy') parser.add_argument('-s', '--stake', action='store_true', help='Sets up Network for the Torpedo Board') args = parser.parse_args() self.tf_listener = tf.TransformListener() self.garlic = args.garlic self.vamp = args.vamp self.stake = args.stake if self.garlic: self.target = 'garlic' self.classes = 1 self.see_sub = mil_tools.Image_Subscriber( topic="/camera/down/image_raw", callback=self.img_callback) elif self.vamp: self.target = 'vamp' self.classes = 4 self.see_sub = mil_tools.Image_Subscriber( topic="/camera/front/left/image_raw", callback=self.img_callback) else: self.target = 'stake' self.classes = 1 self.see_sub = mil_tools.Image_Subscriber( topic="/camera/front/left/image_raw", callback=self.img_callback) # Number of frames self.num_frames = rospy.get_param('~num_frames', 0) # Number of objects we detect self.num_objects_detect = rospy.get_param('~objects_detected', 1) # Mininum confidence score for the detections self.score_thresh = rospy.get_param('~score_thresh', 0.99) # If we want debug images published or not. self.debug = rospy.get_param('~debug', True) # Camera image width and height self.im_width = 1920 self.im_height = 1080 # Current time self.start_time = datetime.datetime.now() ''' Misc Utils, Image Subscriber, and Service Call. ''' # CV bridge for converting from rosmessage to cv_image self.bridge = CvBridge() self.inference_graph, self.sess = detector_utils.load_inference_graph( self.target, self.classes) # Subscribes to our image topic, allowing us to process the images # self.sub1 = rospy.Subscriber( # self.camera_topic, Image, self.img_callback, queue_size=1) self.see_frame_counter = 0 self.see_camera_info = self.see_sub.wait_for_camera_info() self.see_img_geom = image_geometry.PinholeCameraModel() self.see_img_geom.fromCameraInfo(self.see_camera_info) ''' Publishers: debug_image_pub: publishes the images showing what tensorflow has identified as path markers bbox_pub: publishes the bounding boxes. ''' self.debug_image_pub = rospy.Publisher('path_debug', Image, queue_size=1) self.bbox_pub = rospy.Publisher('bbox_pub', Point, queue_size=1) self.roi_pub = rospy.Publisher('roi_pub', RegionOfInterest, queue_size=1) self.dictionary = {}
def detector(): ## # Game params # HEIGHT = 720 paddle_left_y = 360 paddle_right_y = 360 PADDLE_HEIGHT = 150 ## # Hand detector setup # score_thresh = 0.7 fps = 10 video_source = 0 width = 640 height = 480 display = 1 cap = cv2.VideoCapture(video_source) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) im_width, im_height = (cap.get(3), cap.get(4)) detection_graph, sess = detector_utils.load_inference_graph() # max number of hands we want to detect/track num_hands_detect = 2 wd = 640 ht = 480 cv2.namedWindow('Left Hand', cv2.WINDOW_NORMAL) cv2.namedWindow('Right Hand', cv2.WINDOW_NORMAL) while True: # try: ret, image_np = cap.read() image_np = cv2.resize(image_np, (wd, ht)) image_np = cv2.flip(image_np, 1) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") width_cutoff = int(wd / 2) image_l = image_np[:, :width_cutoff] image_r = image_np[:, width_cutoff:] w, h = int(wd / 2), ht boxesl, scoresl = detector_utils.detect_objects( image_l, detection_graph, sess) detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scoresl, boxesl, w, h, image_l) boxesr, scoresr = detector_utils.detect_objects( image_r, detection_graph, sess) detector_utils.draw_box_on_image(num_hands_detect, score_thresh, scoresr, boxesr, w, h, image_r) cv2.imshow('Left Hand', cv2.cvtColor(image_l, cv2.COLOR_RGB2BGR)) cv2.imshow('Right Hand', cv2.cvtColor(image_r, cv2.COLOR_RGB2BGR)) if cv2.waitKey(25) & 0xFF == 27: cv2.destroyAllWindows() client.close() break for i in range(num_hands_detect): if (scoresl[i] > score_thresh): (left, right, top, bottom) = (boxesl[i][1] * w, boxesl[i][3] * w, boxesl[i][0] * h, boxesl[i][2] * h) #paddle_left_y = int(((top+bottom)/2)* (HEIGHT/ht)) paddle_left_y = map_val(top, bottom) if paddle_left_y < 0: paddle_left_y = 0 if paddle_left_y > (HEIGHT - PADDLE_HEIGHT / 2): paddle_left_y = HEIGHT - PADDLE_HEIGHT / 2 try: print('Sending Left paddle coordinates...') send_paddle_coordinates('left', paddle_left_y) print('Sent !') except: pass ## print(int((top+bottom)/2 * (HEIGHT/ht))) if (scoresr[i] > score_thresh): (left, right, top, bottom) = (boxesr[i][1] * w, boxesr[i][3] * w, boxesr[i][0] * h, boxesr[i][2] * h) #paddle_right_y = int(((top+bottom)/2)* (HEIGHT/ht)) paddle_right_y = map_val(top, bottom) if paddle_right_y > (HEIGHT - PADDLE_HEIGHT / 2): paddle_right_y = HEIGHT - PADDLE_HEIGHT / 2 if paddle_right_y < 0: paddle_right_y = 0 try: print('Sending Rigth paddle coordinates...') send_paddle_coordinates('right', paddle_right_y) print('Sent !') except: pass
def run_test(read_csv, logger): img_folder = '/home/zgwu/HandImages/long_video/test_frames/' save_folder = '/home/zgwu/HandImages/long_video/double_frames/' if not os.path.exists(save_folder): os.mkdir(save_folder) label_dict = read_test_csv_from(read_csv) input_images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='input') # placeholder holds an input tensor for classification if ModelType == 'mobilenet': out, end_points = mobilenet_v2.mobilenet(input_tensor=input_images, num_classes=num_classes, depth_multiplier=1.0, is_training=False) else: with slim.arg_scope(inception_v3.inception_v3_arg_scope()): out, end_points = inception_v3.inception_v3( inputs=input_images, num_classes=num_classes, is_training=False) detection_graph, sessd = detector_utils.load_inference_graph( ) # ssd to detect hands saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, CKPT) CM = [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]] D = { 'num_pictures': 0, 'acc': 0, 'num_hands': 0, 'detect_t': 0.0, 'classify_t': 0.0, 'o': [0, 0, 0, 0, 0, 0, 0], 'd': [0, 0, 0, 0, 0, 0, 0], 'r': [0, 0, 0, 0, 0, 0, 0], 'tp': [0, 0, 0, 0, 0, 0, 0] } tot_count = 0 y_true, y_pred = [], [] label_matrix = np.empty((0, num_classes), dtype=int) score_matrix = np.empty((0, num_classes), dtype=int) for num_img, (img_name, frame_label) in enumerate(label_dict.items()): tot_count += 1 if tot_count % 100 == 1: print('Process {} --- {} / {}'.format(img_name, tot_count, len(label_dict))) l, t, r, b, clazz = frame_label acc_index = labelsDict[clazz] D['o'][acc_index] += 1 # confusion matrix # filename = os.path.basename(img_name) name, ext = os.path.splitext(img_name) # print("Processing the image : " + name + " ... {}/{}".format(num_img+1, len(label_dict))) key = cv2.waitKey(5) & 0xff ## Use Esc key to close the program if key == 27: break if key == ord('p'): cv2.waitKey(0) image_raw = cv2.imread(os.path.join(img_folder, img_name)) image_np = cv2.resize(image_raw, (im_width, im_height)) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") start = time.time() # set time boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sessd) hands = from_image_crop_boxes(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, 0.5) endh = time.time() # detect hand detect_t = endh - start D['detect_t'] += detect_t if not hands: continue else: D['num_pictures'] += 1 D['d'][acc_index] += 1 for rank, rect in enumerate(hands): D['num_hands'] += 1 left, top, right, bottom = rect region = image_np[top:bottom, left:right] region = cv2.resize(region, (224, 224), cv2.INTER_AREA) # feed = np.expand_dims(region, axis=0) # maybe it's a wrong format to feed img_data = tf.image.convert_image_dtype(np.array(region)[:, :, 0:3], dtype=tf.float32) # RGB # elements are in [0,1) resized_img = tf.image.resize_images(img_data, size=[224, 224], method=0) # decode an image img = resized_img.eval(session=sess) img.resize([1, 224, 224, 3]) # input an image array and inference to get predictions and set normal format predictions = end_points['Predictions'].eval( session=sess, feed_dict={input_images: img}) label = np.zeros((1, num_classes), dtype=int) label[0, acc_index] = 1 label_matrix = np.append(label_matrix, label, axis=0) score_matrix = np.append(score_matrix, predictions.reshape([1, num_classes]), axis=0) #print(label, predictions.reshape([1, num_classes])) predictions.resize([num_classes]) np.set_printoptions(precision=4, suppress=True) index = int(np.argmax(predictions)) y_true.append(acc_index) y_pred.append(index) D['r'][index] += 1 msg = img_name + ' ' + clazz + ' ' + labelsStr[index] CM[acc_index][index] += 1 if index == acc_index: D['acc'] += 1 D['tp'][index] += 1 logger.info(msg) if key == ord('s'): region = cv2.cvtColor(region, cv2.COLOR_RGB2BGR) cv2.imwrite( frame_path + name + '_' + str(D['num_frames']) + '_' + str(rank) + '.jpg', region) cv2.waitKey(0) endr = time.time() classify_t = endr - endh D['classify_t'] += classify_t print( "From {} pictures, we detect {} hands with {} accurate prediction ({:.2f})" .format(tot_count, D['num_hands'], D['acc'], D['acc'] / D['num_hands'])) result_log = '\n@@images_count: {} and detect_count: {}'.format(tot_count, D['num_pictures']) + \ '\n@@image_size: (width : {}, height: {})'.format(im_width, im_height) + \ '\n@@num_hand_detect: {} - {}%'.format(D['num_hands'], int(100 * D['num_hands'] / tot_count)) + \ '\n@@each_elapsed_time: (detect_hands: {:.4f}s, classify_hand: {:.4f}s)'.format( D['detect_t'] / tot_count, D['classify_t'] / D['num_hands']) + \ '\n@@classify_result: Fist Admire Victory Okay None Palm Six' + \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- origin classes' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- detect classes' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- recognize count' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- true positive' \ .format(D['o'][0], D['o'][1], D['o'][2], D['o'][3],D['o'][4], D['o'][5], D['o'][6], D['d'][0], D['d'][1], D['d'][2], D['d'][3],D['d'][4], D['d'][5], D['d'][6], D['r'][0], D['r'][1], D['r'][2], D['r'][3], D['r'][4], D['r'][5], D['r'][6], D['tp'][0], D['tp'][1], D['tp'][2], D['tp'][3], D['tp'][4], D['tp'][5], D['tp'][6]) + \ '\n@@accuracy: {}/{} - {}%'.format(D['acc'], D['num_hands'], int(100 * D['acc'] / D['num_hands'])) + \ '\n' + '-' * 100 + \ '\n' + str(CM) #print(result_log) logger.info(result_log) #print(classification_report(y_true, y_pred, target_names=labelsStr, digits=3)) logger.info( str( classification_report(y_true, y_pred, target_names=labelsStr, digits=3))) print(label_matrix.shape, score_matrix.shape) # 计算每一类的ROC fpr = dict() tpr = dict() roc_auc = dict() # Compute micro-average ROC curve and ROC area(方法二) fpr["micro"], tpr["micro"], _ = roc_curve(label_matrix.ravel(), score_matrix.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # FPR就是横坐标,TPR就是纵坐标 plt.plot(fpr["micro"], tpr["micro"], c='r', lw=2, alpha=0.7, label=u'AUC=%.3f' % roc_auc["micro"]) plt.plot((0, 1), (0, 1), c='#808080', lw=1, ls='--', alpha=0.7) plt.xlim((-0.01, 1.02)) plt.ylim((-0.01, 1.02)) plt.xticks(np.arange(0, 1.1, 0.1)) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xlabel('False Positive Rate', fontsize=13) plt.ylabel('True Positive Rate', fontsize=13) plt.grid(b=True, ls=':') plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12) plt.title(u'The ROC and AUC of MobileNet Classifier.', fontsize=17) plt.show() """
def worker(input_q, output_q, cropped_output_q, inferences_q, pointX_q, pointY_q, cap_params, frame_processed): print(">> loading frozen model for worker") detection_graph, sess = detector_utils.load_inference_graph() sess = tf.Session(graph=detection_graph) print(">> loading keras model for worker") try: model, classification_graph, session = classifier.load_KerasGraph("cnn/models/hand_poses_wGarbage_10.h5") except Exception as e: print(e) while True: #print("> ===== in worker loop, frame ", frame_processed) frame = input_q.get() if (frame is not None): #実際の検出。 変数ボックスには、検出された手の境界ボックスの座標が含まれています。 #スコアには、これらの各ボックスの信頼度が含まれています。 #ヒント:len(boxes)> 1の場合、(スコアのしきい値内で)少なくとも片方の手を見つけたと見なすことができます。 boxes, scores = detector_utils.detect_objects( frame, detection_graph, sess) #関心領域を取得 #フレームの画像サイズを取得 cropped_height,cropped_width,a = frame.shape[:3] res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cropped_width,cropped_height, frame) #手の判定が一定値を超えたとき if (scores[0] > score_thresh): (left, right, top, bottom) = (boxes[0][1] * cropped_width, boxes[0][3] * cropped_width, boxes[0][0] * cropped_height, boxes[0][2] * cropped_height) #ウィンドウサイズを取得 width,height = autopy.screen.size() #画面比率変数設定 # wx = (width + (int(right)-int(left))*(width / cap_params['im_width'])) / cap_params['im_width'] # # hx = (height + (int(bottom)-int(top))*(height / cap_params['im_height'])) / cap_params['im_height'] # wx = (width + (int(right)-int(left))*(width / cropped_width)) / (cropped_width-20) # # hx = (height + (int(bottom)-int(top))*(height / cropped_height)) / (cropped_height-20) # p1 = int(left)*wx # p2 = int(bottom)*hx-(int(bottom)*hx-int(top)*hx) hx = (height / cropped_height)*1.3 wx = (width / cropped_width)*1.3 #手を識別したボックスの固定化処理 hi = 100 - (int(bottom)-int(top)) wi = 70 - (int(right)-int(left)) top = top - hi left = left - wi fp = (int(left),int(top)) ep = (int(right),int(bottom)) p1 = int(left)*wx p2 = int(top)*wx if(abs(hi)>25 or abs(wi)>25): cv2.rectangle(frame, fp, ep, (255, 0, 0), 1, 1) #判定した手の範囲を表示 else: cv2.rectangle(frame, fp, ep, (77, 255, 9), 1, 1) #取得した座標(p1,p2)を挿入 pointX_q.put(p1) pointY_q.put(p2) #手のポーズを分類する if res is not None: class_res = classifier.classify(model, classification_graph, session, res) inferences_q.put(class_res) #バウンディングボックスで注釈が付けられたフレームをキューに追加 cropped_output_q.put(res) output_q.put(frame) frame_processed += 1 else: output_q.put(frame) sess.close()
def detect(): global sess global label_lines global softmax_tensor global res global score global frame global fist global two global three global four global five global status global sfive global path color = (0, 255, 0) label_lines = [line.rstrip() for line in tf.gfile.GFile("output_labels.txt")] # Unpersists graph from file with tf.gfile.FastGFile("output_graph.pb", 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') detection_graph, sessD = detector_utils.load_inference_graph() fist=0 two=0 three=0 four=0 five=0 sfive=0 status='command' path='' with tf.Session() as sess: score_thresh = 0.60 softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') # Get stream from webcam and set parameters) vs = VideoStream().start() # max number of hands we want to detect/track num_hands_detect = 1 im_height, im_width = (None, None) try: while True: if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() vs.stop() break # Read Frame and process frame = vs.read() frame = cv2.flip(frame, 1) frame = cv2.resize(frame, (640, 480)) if im_height is None: im_height, im_width = frame.shape[:2] # Convert image to rgb since opencv loads images in bgr, if not accuracy will decrease try: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") # Run image through tensorflow graph boxes, scores, classes = detector_utils.detect_objects( frame, detection_graph, sessD) for i in range(num_hands_detect): if scores[i] > score_thresh: (left, right, top, bottom) = (boxes[i][1] * im_width, boxes[i][3] * im_width, boxes[i][0] * im_height, boxes[i][2] * im_height) height = bottom - top width = right - left img_cropped = frame[int(top):int(top + height), int(left):int(left + width)] img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_RGB2BGR) image_data = cv2.imencode('.jpg', img_cropped)[1].tostring() res, score = predict(image_data) cv2.putText(frame, '%s' % (res.upper()), (100,400), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255), 4) #(frame,text,co-ordinates,fontype,font size,fontcolor,font boldness) cv2.putText(frame, '(score = %.5f)' % (float(score)), (100,450), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255),1) if classes[i] == 1: hlabel = 'open' if classes[i] == 2: hlabel='close' cv2.putText(frame, hlabel+str("{0:.2f}".format(scores[i])), (int(left), int(top) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color,1) if status=='command': actions_invoke(res, score) else: actions_perform(hlabel,path,res,score) # Draw bounding boxes and text detector_utils.draw_box_on_image( num_hands_detect, score_thresh, scores, boxes, classes, im_width, im_height, frame) cv2.imshow('Detection', cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) except KeyboardInterrupt: pass # print("Average FPS: ", str("{0:.2f}".format(fps)))
def __init__(self): ''' Parameters ''' # Timeout threshold in seconds self.time_thresh = rospy.get_param('~time_thresh', 1) # Centering Threshold in pixels self.centering_thresh = rospy.get_param('~centering_thresh', 50) # Color thresholds to find orange, BGR self.lower = rospy.get_param('~lower_color_threshold', [0, 0, 0]) self.upper = rospy.get_param('~upper_color_threshold', [255, 255, 255]) # Camera topic we are pulling images from for processing self.camera_topic = rospy.get_param('~camera_topic', '/camera/front/left/image_rect_color') # Number of frames self.num_frames = rospy.get_param('~num_frames', 0) # Number of objects we detect self.num_objects_detect = rospy.get_param('~objects_detected', 2) # Mininum confidence score for the detections self.score_thresh = rospy.get_param('~score_thresh', 0.1) # If we want debug images published or not. self.debug = rospy.get_param('~debug', True) # Camera image width and height self.im_width = rospy.get_param('~im_width', 720) self.im_height = rospy.get_param('~im_height', 480) # Current time self.start_time = datetime.datetime.now() # Midpoint of the overall image, in pixels self.midpoint = [self.im_width / 2, self.im_height / 2] # Whether or not we have centered on an object. self.centered = False ''' Misc Utils, Image Subscriber, and Service Call. ''' # CV bridge for converting from rosmessage to cv_image self.bridge = CvBridge() self.inference_graph, self.sess = detector_utils.load_inference_graph() # Subscribes to our image topic, allowing us to process the images self.sub1 = rospy.Subscriber( self.camera_topic, Image, self.img_callback, queue_size=1) ''' Publishers: debug_image_pub: publishes the images showing what tensorflow has identified as path markers orange_image_pub: publishes our masked image, showing what we see to be orange. direction_pub: publishes the proposed direction of the path marker. If our mission script reads this and confirms it is a marker,it turns following this direction. Either 'left', 'right', or 'none.' orange_detection: ensures we are centering on an orange group of pixels path_roi: publishes the region of interest and its coordinates. This is used for debugging purposes. ''' self.debug_image_pub = rospy.Publisher( 'path_debug', Image, queue_size=1) self.orange_image_pub = rospy.Publisher( 'orange_debug', Image, queue_size=1) self.direction_pub = rospy.Publisher( 'path_direction', String, queue_size=1) self.orange_detection = rospy.Publisher( 'path_orange', String, queue_size=1)
roslib.load_manifest('easy_markers') import rospy from easy_markers.generator import * from visualization_msgs.msg import Marker from utils import detector_utils as detector_utils import cv2 import tensorflow as tf import datetime import argparse # from utils.detector_utils import WebcamVideoStream import pyrealsense2 as rs import numpy as np import matplotlib.pyplot as plt # cap = cv2.VideoCapture(0) detection_graph, sess = detector_utils.load_inference_graph() if __name__ == '__main__': rospy.init_node('hand_tracking') pub = rospy.Publisher('/hand_mark', Marker) gen = MarkerGenerator() rate = rospy.Rate(2) gen.type = Marker.SPHERE_LIST gen.scale = [.03] * 3 gen.frame_id = '/camera_color_optical_frame' # count = 0 parser = argparse.ArgumentParser() parser.add_argument('-sth', '--scorethreshold', dest='score_thresh',
def __init__(self): ''' Parameters ''' # Timeout threshold in seconds self.time_thresh = rospy.get_param('~time_thresh', 1) # Centering Threshold in pixels self.centering_thresh = rospy.get_param('~centering_thresh', 50) # Color thresholds to find orange, BGR self.lower = rospy.get_param('~lower_color_threshold', [0, 30, 100]) self.upper = rospy.get_param('~upper_color_threshold', [100, 255, 255]) # Camera topic we are pulling images from for processing self.camera_topic = rospy.get_param('~camera_topic', '/camera/down/image_rect_color') # Number of frames self.num_frames = rospy.get_param('~num_frames', 0) # Number of objects we detect self.num_objects_detect = rospy.get_param('~objects_detected', 2) # Mininum confidence score for the detections self.score_thresh = rospy.get_param('~score_thresh', 0.1) # If we want debug images published or not. self.debug = rospy.get_param('~debug', True) # Camera image width and height self.im_width = rospy.get_param('~im_width', 720) self.im_height = rospy.get_param('~im_height', 480) # Current time self.start_time = datetime.datetime.now() # Midpoint of the overall image, in pixels self.midpoint = [self.im_width / 2, self.im_height / 2] # Whether or not we have centered on an object. self.centered = False # Whether or not we are enabled self.enabled = False ''' Misc Utils, Image Subscriber, and Service Call. ''' # CV bridge for converting from rosmessage to cv_image self.bridge = CvBridge() # Inference graph and session for tensorflow self.inference_graph, self.sess = detector_utils.load_inference_graph() # Service Call = the on/off switch for this perception file. rospy.Service('~enable', SetBool, self.toggle_search) # Subscribes to our image topic, allowing us to process the images self.sub1 = rospy.Subscriber(self.camera_topic, Image, self.img_callback, queue_size=1) ''' Publishers: debug_image_pub: publishes the images showing what tensorflow has identified as path markers orange_image_pub: publishes our masked image, showing what we see to be orange. direction_pub: publishes the proposed direction of the path marker. If our mission script reads this and confirms it is a marker,it turns following this direction. Either 'left', 'right', or 'none.' orange_detection: ensures we are centering on an orange group of pixels path_roi: publishes the region of interest and its coordinates. This is used for debugging purposes. ''' self.debug_image_pub = rospy.Publisher('path_debug', Image, queue_size=1) self.orange_image_pub = rospy.Publisher('orange_debug', Image, queue_size=1) self.direction_pub = rospy.Publisher('path_direction', String, queue_size=1) self.orange_detection = rospy.Publisher('path_orange', String, queue_size=1)