Esempio n. 1
0
def worker(input_q, output_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            # draw bounding boxes
            detector_utils.get_pic(cap_params['num_hands_detect'],
                                   cap_params["score_thresh"], scores, boxes,
                                   cap_params['im_width'],
                                   cap_params['im_height'], frame)
            # add frame annotated with bounding box to queue
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 2
0
 def __init__(self, haar_cascade_filepath, parent=None):
     ###############################Face initialization#######################
     super().__init__(parent)
     self.classifier = cv2.CascadeClassifier(haar_cascade_filepath)
     self.image = QtGui.QImage()
     self._red = (0, 0, 255)
     self._width = 2
     self._min_size = (50, 50)
     self.l1 = QtWidgets.QLabel()
     ###############################Hand initialization#######################
     self.detection_graph, self.sess = detector_utils.load_inference_graph()
def worker(input_q, output_q, cropped_output_q, inferences_q, cap_params,
           frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)

    print(">> loading keras model for worker")
    try:
        model, classification_graph, session = classifier.load_KerasGraph(
            "./cnn/cnn.h5")
    except Exception as e:
        print(e)

    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # get region of interest
            res = detector_utils.get_box_image(cap_params['num_hands_detect'],
                                               cap_params["score_thresh"],
                                               scores, boxes,
                                               cap_params['im_width'],
                                               cap_params['im_height'], frame)

            # draw bounding boxes
            detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                             cap_params["score_thresh"],
                                             scores, boxes,
                                             cap_params['im_width'],
                                             cap_params['im_height'], frame)

            # classify hand pose
            if res is not None:
                res = cv2.cvtColor(res, cv2.COLOR_RGB2GRAY)
                res = cv2.resize(res, (28, 28), interpolation=cv2.INTER_AREA)
                class_res = classifier.classify(model, classification_graph,
                                                session, res)
                inferences_q.put(class_res)

            # add frame annotated with bounding box to queue
            cropped_output_q.put(res)
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 4
0
def worker(input_q, output_q, cap_params, frame_processed, points):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    prevPoint = None
    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            # draw bounding boxes
            detectedPoint = detector_utils.draw_box_on_image(
                cap_params['num_hands_detect'], cap_params["score_thresh"],
                scores, boxes, cap_params['im_width'], cap_params['im_height'],
                frame)
            point = detectedPoint

            if detectedPoint is not None:
                #print(detectedPoint)
                if prevPoint is not None:
                    if aboveThreshhold(
                            prevPoint,
                            cap_params["im_height"]) and not aboveThreshhold(
                                detectedPoint, cap_params["im_height"]):
                        print("Detected: " + str(detectedPoint) + "Prev: " +
                              str(prevPoint))
                        print("Under")
                        points.put(detectedPoint)
                    #else:
                    #points.put((-1, -1))
                    #callHits.get()(detectedPoint[0], cap_params["im_height"], cap_params["im_width"])
                    #call circle collision thing
                #else:
                #points.put((-1, -1))

                prevPoint = detectedPoint
            #else:
            #points.put((-1, -1))
            # add frame annotated with bounding box to queue
            output_q.put(frame)
            frame_processed += 1
        else:
            #points.put((-1, -1))
            output_q.put(frame)
    sess.close()
Esempio n. 5
0
def worker(input_q, output_q, cap_params, frame_processed):
    global called, c, ges, score
    if not called:
        gesture.load_model()
        called =True

    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)

    while True:
        print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()

        if frame is not None:
            # actual detection
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            details = {
                'boxes': boxes,
                'scores': scores
            }
            # draw bounding boxes
            cropped_image = detector_utils.draw_box_on_image(
                cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['width']
                , cap_params['height'], frame)

            if cropped_image is not None and c == 0:
                cropped_image = cv2.flip(cropped_image, 1)
                ges, score = gesture.predict(cropped_image/255)
            print(ges, score)
            details['frame'] = frame
            details['cropped_image'] = cropped_image
            details['ges'] = ges
            details['score'] = score

            output_q.put(details)
            frame_processed += 1
        else:   
            output_q.put({
                'boxes': [],
                'frame': frame,
                'ges': ges,
                'score': score
            })
    c = (c+1) % 10
    sess.close()
Esempio n. 6
0
    def __init__(self):
        rospy.init_node('image_detection')
        self.bridge = CvBridge()
        self.inference_graph, self.sess = detector_utils.load_inference_graph()
        self.subscriber = rospy.Subscriber(
            '/camera/front/left/image_rect_color', Image, self.img_callback)
        self.publisher = rospy.Publisher('/vision/dice/debug_rcnn',
                                         Image,
                                         queue_size=1)

        # Parameters
        self.num_frames = 0
        self.num_objects_detect = 1
        self.score_thresh = 0.1
        self.im_width = 1920
        self.im_height = 1080
        self.start_time = datetime.datetime.now()
Esempio n. 7
0
def worker(input_q, output_q, boxes_q, inferences_q, cap_params,
           frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector.load_inference_graph()
    sess = tf.Session(graph=detection_graph)

    print(">> loading keras model for worker")
    try:
        model, classification_graph, session = classifier.load_KerasGraph(
            "cnn/models/hand_poses_wGarbage_10.h5")
    except Exception as e:
        print(e)

    try:
        while True:
            # print("> ===== in worker loop, frame ", frame_processed)
            frame = input_q.get()
            if (frame is not None):
                # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
                # while scores contains the confidence for each of these boxes.
                # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)
                boxes, scores = detector.detect_objects(
                    frame, detection_graph, sess)

                # get region of interest
                box, res = detector.get_box_image(
                    cap_params['num_hands_detect'], cap_params["score_thresh"],
                    scores, boxes, cap_params['im_width'],
                    cap_params['im_height'], frame)

                # classify hand pose
                if res is not None:
                    class_res = classifier.classify(model,
                                                    classification_graph,
                                                    session, res)
                    inferences_q.put(class_res)

                output_q.put(frame)
                boxes_q.put(box)
                frame_processed += 1
            else:
                output_q.put(frame)
    except Exception as e:
        print(e)
    sess.close()
Esempio n. 8
0
def Worker(input_q, frameNumber_q, worker_q, worker_frame_q, id):
    global TERMINATE
    idleTimer = time.time()  #Saves time at start of last frame
    PROCESSING = False  #Indicates a video stream is being processed
    msg_timer = time.time()
    #Load hand recognition graph
    detection_graph, sess = detector_utils.load_inference_graph()

    while not (PROCESSING and (time.time() - idleTimer) > 10):
        if not input_q.empty():
            PROCESSING = True  #Becomes true when at least one frame has been received
            idleTimer = time.time()

            input_cv.acquire()
            while input_q.empty():
                if TERMINATE:
                    input_cv.release()
                    break
                input_cv.wait()
            if TERMINATE:
                break
            image_np = input_q.get()
            frameNumber = frameNumber_q.get()
            input_cv.notifyAll()
            input_cv.release()

            image_np = cv2.cvtColor(
                image_np,
                cv2.COLOR_BGR2RGB)  #Convert frame to correct color format

            boxes, scores = detector_utils.detect_objects(
                image_np, detection_graph, sess)  #Detect hands

            frame_cv.acquire()

            while frameNumber != next_frame:
                frame_cv.wait()
            worker_q.put([boxes, scores])

            if DISPLAY_OUTPUT or SAVE_OUTPUT:
                worker_frame_q.put(image_np)
            frame_cv.release()

    print("Worker " + str(id) + " exited")
Esempio n. 9
0
    def __init__(self):
        rospy.init_node('dice_detection')
        self.bridge = CvBridge()
        self.subscriber = rospy.Subscriber(
            '/camera/front/left/image_rect_color', Image, self.img_callback)
        self.publisher = rospy.Publisher(
            '/dice/debug_rcnn', Image, queue_size=1)

        self.dice_publisher = rospy.Publisher(
            '/dice/points', Point, queue_size=1)

        self.inference_graph, self.sess = detector_utils.load_inference_graph()
        # Parameters
        self.num_frames = 0
        self.num_objects_detect = 4
        self.score_thresh = 0.1
        self.im_width = 1920
        self.im_height = 1080
        self.start_time = datetime.datetime.now()
def worker(input_q, output_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # actual detection
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            # draw bounding boxes
            detector_utils.draw_box_on_image(
                cap_params['num_hands_detect'], cap_params["score_thresh"], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame)
            # add frame annotated with bounding box to queue
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 11
0
def worker_hand(input_q, output_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph(
        r'/home/testuser/tf_tut/obj_detection/handtracking/hand_inference_graph/frozen_inference_graph.pb'
    )
    while True:
        print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # actual detection
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)
            # draw bounding boxes
            detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                             cap_params["score_thresh"],
                                             scores, boxes,
                                             cap_params['im_width'],
                                             cap_params['im_height'], frame)
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
            break
    sess.close()
Esempio n. 12
0
def gesture(st="y"):
    detection_graph, sess = detector_utils.load_inference_graph()
    parser = argparse.ArgumentParser()
    parser.add_argument('-sth', '--scorethreshold', dest='score_thresh', type=float,
                        default=0.2, help='Score threshold for displaying bounding boxes')
    parser.add_argument('-fps', '--fps', dest='fps', type=int,
                        default=1, help='Show FPS on detection/display visualization')
    parser.add_argument('-src', '--source', dest='video_source',
                        default=0, help='Device index of the camera.')
    parser.add_argument('-wd', '--width', dest='width', type=int,
                        default=320, help='Width of the frames in the video stream.')
    parser.add_argument('-ht', '--height', dest='height', type=int,
                        default=180, help='Height of the frames in the video stream.')
    parser.add_argument('-ds', '--display', dest='display', type=int,
                        default=1, help='Display the detected images using OpenCV. This reduces FPS')
    parser.add_argument('-num-w', '--num-workers', dest='num_workers', type=int,
                        default=4, help='Number of workers.')
    parser.add_argument('-q-size', '--queue-size', dest='queue_size', type=int,
                        default=5, help='Size of the queue.')
    args = parser.parse_args()
    #return('gesture')
    #global testDir
    #global img_rows, img_cols, maxFrames
    #global depthFrame
#crop parameter
    #global xupam
    #global yupam
    #global xdpam
    #global ydpam
    #global classGest
    #global delayGest
    #global delayBol
#load the model and weight
    #global json_file
    #global loaded_model_json
    #global loaded_model
#setup cv face detection
    #global face_cascade
# setup Realsense
# Configure depth and color streams
# for text purpose
    global handin
    global txt
    global txtLoad
    global txtDelay
    global txtRecord
    global txtDel
    global txtProbability
    global font
    global to_reload
    global app
    global currentState
    #global framearray
    #global ctrDelay
    #global channel
    #global gestCatch
    #global gestStart
    #global x,y,w,h
    #global vc
    #global rval , firstFrame
    #global heightc, widthc, depthcol
    #global imgTxt
    #global resultC
    #global count
    #global stat
    pipeline = rs.pipeline()
    K.clear_session()
    testDir = "videoTest/"
    img_rows, img_cols, maxFrames = 50, 50, 55
    depthFrame = 0
#crop parameter
    xupam = 350
    yupam = 200

    xdpam = 250
    ydpam = 300
    depthFrame = 0
    cameraHeightR = 480
    #cameraWidthR = 848
    cameraWidthR = 848
    frameRateR = 60
    #classGest = ['1','11','12','13','4','5','7','8']
    classGest = ['1', '12', '13', '14', '15', '3', '4', '5', '8', '9']
    nameGest = ['call', 'scroll up', 'scroll down', 'right', 'left', "like", 'play/pause', 'close', 'click', 'back']
    delayGest = 10
    delayBol = False
    framearray = []
    ctrDelay = 0
    channel = 1
    gestCatch = False
    gestStart = False
    backgroundRemove = True
    x,y,w,h = 0,0,0,0
    count=0
    boxCounter = True
#load the model and weight
    json_file = open('3dcnnresult/34/3dcnnmodel.json', 'r')

    loaded_model_json = json_file.read()
    json_file.close()
	# load weights into new model
    loaded_model = model_from_json(loaded_model_json)
    #loaded_model.load_weights("3dcnnresult/3dcnnmodel.hd5")
    loaded_model.load_weights("3dcnnresult/34/3dcnnmodel.hd5")
    loaded_model.compile(loss=categorical_crossentropy,
                     optimizer='adam', metrics=['accuracy'])
#setup cv face detection
    conf = loaded_model.model.get_config()


    shapeInput, ap = loaded_model.model.get_layer(name="dense_2").input_shape
    shapeOutput, sp = loaded_model.model.get_layer(name="dense_2").output_shape
    weightDense2 = loaded_model.model.get_layer(name="dense_2").get_weights()[0]
    weightDense22I = loaded_model.model.get_layer(name="dense_2").get_weights()[1]
    weightDense22A = loaded_model.model.get_layer(name="dense_2").get_weights()[1]
    face_cascade = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_alt2.xml')
    if(st=="x"):
        print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
        '''
        profiles = config.resolve(pipeline)  
        device = profiles.get_device()
        ctx = rs.context()
        device.hardware_reset()
        gesture() 
        '''
        ytfsmfile = "youtube_fsm.txt"
        ytFsmTable = loadFSM(ytfsmfile)
    else:
        ytfsmfile = "fb_fsm.txt"
        ytFsmTable = loadFSM(ytfsmfile)
    updatedWeight = False
    updatedWeight2 = False
    config = rs.config()
    config.enable_stream(rs.stream.depth, cameraWidthR, cameraHeightR, rs.format.z16, frameRateR)
    config.enable_stream(rs.stream.color, cameraWidthR, cameraHeightR, rs.format.bgr8, frameRateR)
    '''vc = cv2.VideoCapture(0)
    rval , firstFrame = vc.read()
    heightc, widthc, depthcol = firstFrame.shape
    imgTxt = np.zeros((heightc, 400, 3), np.uint8)
    #print('tryyyyy1')'''
    stat =0

        
#        some_queue.put("something")
           
    # Start streaming
    profile = pipeline.start(config)

    # Getting the depth sensor's depth scale (see rs-align example for explanation)
    depth_sensor = profile.get_device().first_depth_sensor()
    depth_scale = depth_sensor.get_depth_scale()
    # print "Depth Scale is: " , depth_scale

    # We will be removing the background of objects more than
    #  clipping_distance_in_meters meters away
    clipping_distance_in_meters = 2 # 1 meter
    clipping_distance = clipping_distance_in_meters / depth_scale

    #print("tes====================")
    # for text purpose
    imgTxt = np.zeros((480, 400, 3), np.uint8)
    txt = "OpenCV"
    txtLoad = "["
    txtDelay = "["
    txtRecord = "Capture"
    txtDel = "Delay"
    txtProbability = "0%"
    font = cv2.FONT_HERSHEY_SIMPLEX

    align_to = rs.stream.color
    align = rs.align(align_to) 
#print("Loaded model from disk")
    count=0
    num_frames = 0
    im_width, im_height = (848, 480)
    # max number of hands we want to detect/track
    num_hands_detect = 1
    score_thresh = 0.37
    while True:
        if True:
            dataImg = []

            # Wait for a coherent pair of frames: depth and color
            
            try:
                frames = pipeline.wait_for_frames()
            
                # Align the depth frame to color frame
                aligned_frames = align.process(frames)
    
                #depth_frame = frames.get_depth_frame()
                #color_frame = frames.get_color_frame()
    
                # Get aligned frames
                aligned_depth_frame = aligned_frames.get_depth_frame()  # aligned_depth_frame is a 640x480 depth image
                color_frame = aligned_frames.get_color_frame()
                
                depth_image = np.asanyarray(aligned_depth_frame.get_data())
                color_image = np.asanyarray(color_frame.get_data())
                # Validate that both frames are valid
                if not aligned_depth_frame or not color_frame:
                    continue
            
            except:
                pipeline.stop()
                print("Error wait_for_frames wait_for_frames")
                gesture()
            
            num_frames += 1
            image_np = color_image
            try:
                image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
            except:
                print("Error converting to RGB")
            boxes, scores = detector_utils.detect_objects(
                    image_np, detection_graph, sess)
            for i in range(num_hands_detect):
                if (scores[i] > score_thresh):
                    handin = True
                    
                    #print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
                    break
            if handin == True:
                gestStart = True
            else:
                gestStart = False 
            # if not depth_frame or not color_frame:
            #if not color_frame:
                #continue

            # Convert images to numpy arrays
            #depth_image = np.asanyarray(depth_frame.get_data())
            #color_image = np.asanyarray(color_frame.get_data())

            if(backgroundRemove == True):
                # Remove background - Set pixels further than clipping_distance to grey
                #grey_color = 153
                grey_color = 0
                depth_image_3d = np.dstack((depth_image, depth_image, depth_image))  # depth image is 1 channel, color is 3 channels
                bg_removed = np.where((depth_image_3d > clipping_distance) | (depth_image_3d <= 0), grey_color, color_image)
                #draw_image = bg_removed
                color_image = bg_removed
                draw_image = color_image
            else:
                draw_image = color_image

            #face detection here
            gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
#            faces = face_cascade.detectMultiScale(gray, 1.1, 2)
#
#            if len(faces) > 0 and gestCatch == False:
#                gestStart = True
#                x, y, w, h = faces[0]
#            else:
#                x, y, w, h = x, y, w, h
#
#            fArea = w*h
            
            if gestCatch == False:
                faces = face_cascade.detectMultiScale(gray, 1.1, 2)
                #print("face: ",len(faces))

                ctr = 0
                idxFace = -1
                minDist = 9999

                if len(faces) > 0:
                    for f in faces:
                        xh, yh, wh, hh = f
                        farea = wh * hh

                        midxf = int(xh + (wh * 0.5))
                        midyf = int(yh + (hh * 0.5))

                        depth_imageS = depth_image * depth_scale
                        deptRef = depth_imageS.item(midyf, midxf)

                        if deptRef <= minDist:
                            idxFace = ctr
                            minDist = deptRef

                        ctr = ctr+1
                    #print("id face", idxFace)

                    if idxFace >= 0:
                        x, y, w, h = faces[idxFace]
                        #cv2.rectangle(draw_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
            #print(fArea)

            #if fArea > 3000:

            # crop the face then pass to resize
            #cv2.rectangle(draw_image, (x, y), (x + w, y + h), (255, 0, 0), 2)

            midx = int(x + (w * 0.5))
            midy = int(y + (h * 0.5))

            xUp = (x - (w * 3))
            yUp = (y - (h * 1.5))

            xDn = ((x + w) + (w * 1))
            yDn = ((y + h) + (h * 2))

            if xUp < 1: xUp = 0
            if xDn >= 848: xDn = 848

            if yUp < 1: yUp = 0
            if yDn >= 480: yDn = 480

            #cv2.rectangle(draw_image, (xUp.__int__(), yUp.__int__()), (xDn.__int__(), yDn.__int__()), (0, 0, 255), 2)

            #cv2.circle(draw_image, (midx.__int__(), midy.__int__()), 10, (255, 0, 0))
#            roi_color = color_image[yUp.__int__():yDn.__int__(), xUp.__int__():xDn.__int__()]
            roi_gray = gray[yUp.__int__():yDn.__int__(), xUp.__int__():xDn.__int__()]
            #find the depth of middle point of face
#            if handin == True and depthFrame==10:
#                depth_imageS = depth_image * depth_scale
#                deptRef = depth_imageS.item(midy, midx)
#                boxColor = color_image.copy()
#                checkhandIn(boxCounter, deptRef, midx, midy, w, h, depth_imageS, boxColor, draw_image,handin)
            if backgroundRemove == True and gestCatch == False:

                depth_imageS = depth_image*depth_scale
                deptRef = depth_imageS.item(midy, midx)
                #print(clipping_distance)

                clipping_distance = (deptRef + 0.2)/depth_scale

                boxColor = color_image.copy()

#                handin = checkhandIn(boxCounter, deptRef, midx, midy, w, h, depth_imageS, boxColor, draw_image)

#                if handin == True:
#                    gestStart = True
#                else:
#                    gestStart = False
                #print("handinnnnnnnnnnnn "+str(gestStart))
            if delayBol == False and gestStart == True:

                if depthFrame < maxFrames:
#                    frame = cv2.resize(roi_color, (img_rows, img_cols))
                    frame = cv2.resize(roi_gray, (img_rows, img_cols))
                    framearray.append(frame)
                    depthFrame = depthFrame + 1
                    txtLoad = txtLoad + "["
                    count=count+1
                    gestCatch = True

                    #print(depthFrame)


                if depthFrame == maxFrames:
                    dataImg.append(framearray)
                    xx = np.array(dataImg).transpose((0, 2, 3, 1))
                    X = xx.reshape((xx.shape[0], img_rows, img_cols, maxFrames, channel))
                    X = X.astype('float32')
                    #print('X_shape:{}'.format(X.shape))
                    #==================== Update the Weight =======================================
                    newWeightI = []
                    newWeightA = []
                    availableGest = getGesture(currentState, ytFsmTable)
                    availG, ignoreG = translateAvailableGest(availableGest, classGest)
                    if updatedWeight:
                        weightI = manipWeight(weightDense22I, ignoreG, 1000)

                        newWeightI.append(weightDense2)
                        newWeightI.append(weightI)

                    if updatedWeight2:
                        maxClass = 10
                        weightA = manipWeight(weightDense22A, availG, 1000)

                        newWeightA.append(weightDense2)
                        newWeightA.append(weightA)

                    #=================================================================================

                    if updatedWeight == False and updatedWeight2 == False:
                        newWeightI.append(weightDense2)
                        newWeightI.append(weightDense22A)

                    loaded_model.model.get_layer(name="dense_2").set_weights(newWeightI)
                    if handin==True:
                        # do prediction
                        resc = loaded_model.predict_classes(X)[0]
                        res = loaded_model.predict_proba(X)[0]
                        
                        # find the result of prediction gesture
                        resultC = classGest[resc]
                        nameResultG = nameGest[resc]

                        for a in range(0, len(res)):
                            print("Gesture {}: {} ".format(str(nameGest[a]), str(res[a] * 100)))
                    else:
                        resultC = 0
                        nameResultG = "not enough frame recorded"
                    print("===============================================================")

                    if updatedWeight2:
                        loaded_model.model.get_layer(name="dense_2").set_weights(newWeightA)

                        # do prediction
                        resc2 = loaded_model.predict_classes(X)[0]
                        res2 = loaded_model.predict_proba(X)[0]

                        # find the result of prediction gesture
                        resultC2 = classGest[resc2]
                        nameResultG2 = nameGest[resc2]
                    #imgTxt = np.zeros((480, 400, 3), np.uint8)                    
                    if updatedWeight2:
                        if res2[resc2] > res[resc]:
                            txt = "ignored gesture"
                            act = -1
                        else:
                            txt = nameResultG
                            act = resultC
                    else:
                        txt = nameResultG
                        act = resultC
                    #show text

                    # check with FSM for finding the next state
                    currentState = getNextState(currentState, act, ytFsmTable)
                    txt = "Gesture-" + str(resultC)
#                    txtProbability = str(res[resc]*100)+"%"

                    framearray = []
                    #dataImg = []
                    txtLoad = ""
                    depthFrame = 0
                    handin = False
                    gestCatch = False
                    delayBol = True

                #cv2.putText(imgTxt, txtLoad, (10, 20), font, 0.1, (255, 255, 255), 2, cv2.LINE_AA)
                #.putText(imgTxt, txtRecord, (10, 50), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
                #cv2.putText(imgTxt, txt, (10, 200), font, 2, (255, 255, 255), 2, cv2.LINE_AA)
                #cv2.putText(imgTxt, txtProbability, (10, 250), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
            # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
            # depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)

            #print(delayBol)
            if delayBol == True:
                ctrDelay = ctrDelay+1
                txtDelay = txtDelay + "["
                #txtDel = "Delay"
                #cv2.putText(imgTxt, txtDelay, (10, 70), font, 0.1, (255, 255, 255), 2, cv2.LINE_AA)
                #cv2.putText(imgTxt, txtDel, (10, 100), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
                if ctrDelay == delayGest:
                    ctrDelay = 0
                    txtDelay = ""
                    delayBol = False

            # Stack both images horizontally
            #images = np.hstack((draw_image, imgTxt))

            # put the text here

            # Show images
            #cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
            #cv2.imshow('RealSense', images)
            #cv2.waitKey(1)
            
            if count==maxFrames:
                #vc.release()
                K.clear_session()
                pipeline.stop()
                sess.close()
                return act
        '''
Esempio n. 13
0
def get_inference_vector_one_frame_alphabet():
    # model trained based on https://www.kaggle.com/mrgeislinger/asl-rgb-depth-fingerspelling-spelling-it-out

    model = HandShapeFeatureExtractor.get_instance()
    vectors = []
    video_names = []
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    count = 0
    myFiles = glob.glob('data/*.mp4')
    for file in myFiles:
        # if os.path.exists('{}'.format(file[:-4])):
        #     os.remove('{}'.format(file[:-4]))
        os.mkdir('{}'.format(file[:-4]))
        cam = cv2.VideoCapture("{}".format(file))
        currentframe = 0
        path = '{}/'.format(file[:-4])
        bug = 0
        while(True):

            # reading from frame
            ret,frame = cam.read()

            if ret:
                # if video is still left continue creating images
                name = 'frame' + str(currentframe) + '.png'
                # print ('Creating...' + name)

                if (currentframe + bug) % 30 == 0:
                    # writing the extracted images
                    #cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
                    bug = 0
                    image_np = frame
                    image_np = cv2.rotate(image_np, cv2.ROTATE_90_COUNTERCLOCKWISE)
                    h, w, c = image_np.shape
                    im_width = w
                    im_height = h

                    try:
                        image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
                        # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
                        # while scores contains the confidence for each of these boxes.
                        # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

                        boxes, scores = detector_utils.detect_objects(image_np,
                                                                    detection_graph, sess)

                        # draw bounding boxes on frame
                        cropped_image = detector_utils.draw_box_on_image(1, 0.2,
                                                        scores, boxes, im_width, im_height,
                                                        image_np)

                        img = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY)
                        results = model.extract_feature(img)
                        results = np.squeeze(results)
                        vectors.append(results)
                        cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR)
                        cv2.imwrite(path + name, cropped_image)
                    except:
                        bug += 1

                    # increasing counter so that it will
                    # show how many frames are created
                currentframe += 1
            else:
                break

        # Release all space and windows once done
        cam.release()
        cv2.destroyAllWindows()

    sess.close()
    return vectors
Esempio n. 14
0
def mono_hand_loop(acq,
                   outSize,
                   config,
                   track=False,
                   paused=False,
                   with_renderer=False):

    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)
    if with_renderer:
        print("Initialize Hand Visualizer...")
        hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize)

    print("Initialize MVA19 CVRL Hand pose net...")
    estimator = mva19.Estimator(config["model_file"], config["input_layer"],
                                config["output_layer"])

    detection_graph, sess = detector_utils.load_inference_graph()

    left_hand_model = config["model_left"]
    started = True
    delay = {True: 0, False: 1}
    ik_ms = est_ms = 0
    p2d = bbox = None
    count = 0
    mbv_viz = opviz = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["boxsize"]
    stride = config["stride"]
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")
    # bbox = config['default_bbox']
    while True:
        print('Estimating pose for frame {}'.format(count))
        try:
            imgs, clbs = acq.grab()

            if imgs is None or len(imgs) == 0:
                break
        except Exception as e:
            print("Failed to grab", e)
            break

        st = time.time()
        bgr = imgs[0]
        #print(bgr)
        clb = clbs[0]

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(
            pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        score = -1
        result_pose = None
        crop_viz = None
        # STEP2 detect 2D joints for the detected hand.
        if started:
            if bbox is None:
                bbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                # bbox = config['default_bbox']
                if bbox is None:
                    cv2.imshow("2D CNN estimation", bgr)
                    cv2.waitKey(1)
                    # to_AC_format(np.zeros((21,3)))
                    # count+=1
                    continue

            else:
                dbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                if dbox is not None:
                    x, y, w, h = bbox
                    x1, y1, w1, h1 = dbox
                    if (x1 > x + w
                            or x1 + w1 < x) or y1 + h1 < y or y1 > y + h:
                        bbox = dbox
                        print("updated")

            x, y, w, h = bbox
            # print("BBOX: ",bbox)
            crop = bgr[y:y + h, x:x + w]
            img, pad = mva19.preprocess(crop, boxsize, stride)
            t = time.time()
            hm = estimator.predict(img)
            est_ms = (time.time() - t)

            # use joint tools to recover keypoints
            scale = float(boxsize) / float(crop.shape[0])
            scale = stride / scale
            ocparts = np.zeros_like(hm[..., 0])
            peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale,
                                 scale)

            # convert peaks to hand keypoints
            hand = mva19.peaks_to_hand(peaks, x, y)

            if hand is not None:
                keypoints = hand

                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3

                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb,
                                        keypoints)
                obsVec = IK.ObservationsVector([
                    rgbKp,
                ])
                t = time.time()
                score, res = pose_estimator.estimate(obsVec)
                ik_ms = (time.time() - t)

                # pose_estimator.print_report()

                if track:
                    result_pose = list(
                        smoothing * np.array(pose_estimator.model.init_pose) +
                        (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)
                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:  #< 150:
                        pose_estimator.model.init_pose = Core.ParamVector(
                            result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()
                        bbox = None

                if score > -1:  # compute result points
                    p2d = np.array(
                        pose_estimator.ba.decodeAndProject(
                            Core.ParamVector(result_pose),
                            clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d, bgr.shape[1::-1])

        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d,
                                                    hand,
                                                    thre=peaks_thre)
            cv2.imshow("2D CNN estimation", viz2d)
            header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % (
                1 / (time.time() - st), 1 /
                (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms)

            if with_renderer:
                hand_visualizer.render(pose_estimator.model,
                                       Core.ParamVector(result_pose), clb)
                mbv_viz = hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                pipeline.draw_rect(viz,
                                   "Hand",
                                   bbox,
                                   box_color=(0, 255, 0),
                                   text_color=(200, 200, 0))

        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."

        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1,
                    cv2.LINE_AA)
        cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = None
            print("RESETING BBOX", bbox)
        if key & 255 == ord('s'):
            started = not started

        p3d = np.array(
            pose_estimator.ba.decode(Core.ParamVector(result_pose),
                                     clb)).reshape(-1, 3)
        count += 1
Esempio n. 15
0
from xlutils.copy import copy
import numpy as np

lst1 = []
lst2 = []
ap = argparse.ArgumentParser()
ap.add_argument(
    '-d',
    '--display',
    dest='display',
    type=int,
    default=1,
    help='Display the detected images using OpenCV. This reduces FPS')
args = vars(ap.parse_args())

model = detector_utils.load_inference_graph()


def save_data(no_of_time_hand_detected, no_of_time_hand_crossed):

    try:
        today = date.today()
        today = str(today)
        #loc = (r'C:\Users\rahul.tripathi\Desktop\result.xls')

        rb = xlrd.open_workbook('result.xls')
        sheet = rb.sheet_by_index(0)
        sheet.cell_value(0, 0)

        #print(sheet.nrows)
        q = sheet.cell_value(sheet.nrows - 1, 1)
Esempio n. 16
0
def worker(input_q, output_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    model = tf.keras.models.load_model('./saved_model2/')

    #################
    # all the following variables store data for sliding window
    sw_size = 30 # sliding window size
    trajecX = [] # trajectory x coord
    trajecY = [] # trajectory y coord
    preds = [] # classified classes 
    preds_freq = np.zeros(5) # frequency of each class
    #################

    screenWd, screenHt = pyautogui.size()
    frameWd, frameHt = cap_params['im_width'], cap_params['im_height']

    scaleX = 1.0 * screenWd / frameWd
    scaleY = 1.0 * screenHt / frameHt

    state = -1
    rad_in, rad_mid, rad_out= 25, 90, 200 # radii of annulus
    last_swipe, swipe_cnt = -1, 0 # will provide sanitary check so same swipes are not performed twice consecutively

    while True:
        # print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()

        if (frame is not None):
            # print(nm, "Not None")
            im_width = cap_params['im_width']
            im_height = cap_params['im_height']

            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # get cropped image
            cropped_img = detector_utils.get_box_image(
                cap_params['num_hands_detect'], cap_params["score_thresh"],
                scores, boxes, cap_params['im_width'], cap_params['im_height'],
                frame)


            # draw bounding boxes
            centroidX, centroidY = detector_utils.draw_box_on_image(
                cap_params['num_hands_detect'], cap_params["score_thresh"],
                scores, boxes, cap_params['im_width'], cap_params['im_height'],
                frame)


            ## enable this if only want to visualize the bounding box
            ## and blur rest of the image
            # visualize_blurred(cap_params['num_hands_detect'], cap_params['score_thresh'], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame)

            # negative centroid means no hand is detected
            if centroidX < 0:
                frame_processed += 1
                output_q.put(frame)
                # cropped_output_q.put(cropped_img)
                continue

            # Updation of centroid and add to trajectory if needed
            if centroidX > 0:
                if len(trajecX) == 0:
                    trajecX.append(centroidX)
                    trajecY.append(centroidY)
                else:
                    px, py = trajecX[-1], trajecY[-1]
                    d = dist(centroidX, centroidY, px, py)
                    if d <= rad_in or (d > rad_mid and d < rad_out):
                        centroidX, centroidY = px, py

                    trajecX.append(centroidX)
                    trajecY.append(centroidY)

                    
            # move sliding window
            if len(trajecX) > sw_size:
                trajecX = trajecX[-sw_size:]
                trajecY = trajecY[-sw_size:]


            # draw centroid
            if len(trajecX) > 0:
                x, y = trajecX[-1], trajecY[-1]

                # frame = visualize_blurred(cap_params['num_hands_detect'], cap_params['score_thresh'], scores, boxes, cap_params['im_width'], cap_params['im_height'], frame)
                # visualize_roi(cap_params['num_hands_detect'], (x, y), 
                        # cap_params['score_thresh'], scores, 
                        # rad_in, rad_mid, rad_out, frame)

                detector_utils.draw_centroid_on_image(
                    cap_params['num_hands_detect'], (x, y), cap_params["score_thresh"],
                    scores, frame)


            # Static gesture prediction
            if cropped_img is not None:
                cropped_img, idx = classify(cropped_img, model)
                detector_utils.draw_label_on_image(mp[idx], frame)
                preds.append(idx)
                preds_freq[idx] += 1

                if len(preds) > sw_size:
                    preds_freq[preds[0]] -= 1
                    preds = preds[-sw_size:]

                if preds[-1] == 1 or preds[-1] == 3:
                    state = control_vlc(last_swipe, preds[-1], state)


                # Control mouse pointer
                # if len(trajecX) > 1:
                    # x = int((trajecX[-1] - trajecX[-2]) )
                    # y = int((trajecY[-1] - trajecY[-2]) )
                    # control_mouse_pointer(x, y, screenWd, screenHt, preds[-1], preds[-2])

            # Dynamic gesture prediction
            if len(trajecX) > 0:
                draw_trajectory(trajecX, trajecY, frame)
                p = probab(preds_freq)
                d = dist(trajecX[-1], trajecY[-1], trajecX[0], trajecY[0])
                # print(d, p)

                if p > 0.5 and d > 110:
                    direc = get_direction(trajecX[-1], trajecY[-1], trajecX[0], trajecY[0])
                    if not last_swipe == direc:
                        # print("swipe {} count: ".format(mp2[last_swipe]), swipe_cnt)
                        swipe_cnt = 0
                    else:
                        swipe_cnt += 1

                    last_swipe = direc

                    if swipe_cnt % 6== 0 and swipe_cnt > 0:
                        print("swipe {} count: ".format(mp2[last_swipe]), swipe_cnt)
                        state = control_vlc(last_swipe, preds[-1], state)


                    detector_utils.draw_label_on_image(mp2[direc], frame, (500, 100))

                

            frame_processed += 1
            output_q.put(frame)
            # cropped_output_q.put(cropped_img)
        else:
            output_q.put(frame)
Esempio n. 17
0
def main():
    currentPath = ''
    currentExample = ''

    print('Do you want to : \n 1 - Add new pose \
                            \n 2 - Add examples to existing pose \
                            \n 3 - Add garbage examples')

    menu_choice = input()
    while menu_choice != '1' and menu_choice != '2' and menu_choice != '3':
        print('Please enter 1 or 2')
        menu_choice = input()

    if menu_choice == '1':
        print('Enter a name for the pose you want to add :')
        name_pose = input()

        # Create folder for pose 
        if not os.path.exists('Poses/' + name_pose):
            os.makedirs('Poses/' + name_pose + '/' + name_pose + '_1/')
            currentPath = 'Poses/' + name_pose + '/' + name_pose + '_1/'
            currentExample = name_pose + '_1_'

    elif menu_choice == '2':
        # Display current poses
        dirs = os.listdir('Poses/')
        dirs.sort()
        dirs_choice = ''
        possible_choices = []
        i = 1
        for _dir in dirs:
            dirs_choice += str(i) + ' - ' + str(_dir) + ' / '
            possible_choices.append(str(i))
            i += 1

        # Ask user to choose to which pose to add examples
        print('Choose one of the following pose:')
        print(dirs_choice)
        choice = input()
        while not choice in possible_choices and dirs[int(choice) - 1] == 'garbage':
            print('Please enter one of the following (not garbage): ' + str(possible_choices))
            choice = input()

        # Count number of files to increment new example directory
        subdirs = os.listdir('Poses/' + dirs[int(choice) - 1] + '/')
        index = len(subdirs) + 1

        # Create new example directory
        if not os.path.exists('Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/'):
            os.makedirs('Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/')

            # Update current path
            currentPath = 'Poses/' + dirs[int(choice) - 1] + '/' + dirs[int(choice) - 1] + '_' + str(index) + '/'
            currentExample = dirs[int(choice) - 1] + '_' + str(index) + '_'
            name_pose = dirs[int(choice) - 1]

    elif menu_choice == '3':
        # Create folder for pose 
        if not os.path.exists('Poses/Garbage/'):
            os.makedirs('Poses/Garbage/Garbage_1/')
            currentPath = 'Poses/Garbage/Garbage_1/'
            currentExample = 'Garbage_1_'
            name_pose = 'Garbage'
        else:
            # Count number of files to increment new example directory
            subdirs = os.listdir('Poses/Garbage/')
            index = len(subdirs) + 1
            # Create new example directory
            if not os.path.exists('Poses/Garbage/Garbage_' + str(index) + '/'):
                os.makedirs('Poses/Garbage/Garbage_' + str(index) + '/')

                # Update current path
                currentPath = 'Poses/Garbage/Garbage_' + str(index) + '/'
                currentExample = 'Garbage_' + str(index) + '_'
                name_pose = 'Garbage'

    print('You\'ll now be prompted to record the pose you want to add. \n \
                Please place your hand beforehand facing the camera, and press any key when ready. \n \
                When finished press \'q\'.')
    input()

    # Begin capturing
    cap = cv2.VideoCapture(0)

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(currentPath + name_pose + '.avi', fourcc, 25.0, (640, 480))

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            # write the frame
            out.write(frame)

            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    # Release everything if job is finished
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    vid = cv2.VideoCapture(currentPath + name_pose + '.avi')

    # Check if the video
    if not vid.isOpened():
        print('Error opening video stream or file')
        return

    print('>> loading frozen model..')
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    print('>> model loaded!')

    _iter = 1
    # Read until video is completed
    while vid.isOpened():
        # Capture frame-by-frame
        ret, frame = vid.read()
        if ret:
            print('   Processing frame: ' + str(_iter))
            # Resize and convert to RGB for NN to work with
            frame = cv2.resize(frame, (320, 180), interpolation=cv2.INTER_AREA)

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Detect object
            boxes, scores = detector_utils.detect_objects(frame, detection_graph, sess)

            # get region of interest
            res = detector_utils.get_box_image(1, 0.2, scores, boxes, 320, 180, frame)

            # Save cropped image 
            if res is not None:
                cv2.imwrite(currentPath + currentExample + str(_iter) + '.png', cv2.cvtColor(res, cv2.COLOR_RGB2BGR))

            _iter += 1
        # Break the loop
        else:
            break

    print('   Processed ' + str(_iter) + ' frames!')

    vid.release()
Esempio n. 18
0
def worker(input_q, output_q, cap_params, frame_processed, poses):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    print(">> loading keras model for worker")
    try:
        model, classification_graph, session = classifier.load_KerasGraph(
            "F:\Realtime_Hand_tracking\cnn\models\hand_poses_wGarbage_100.h5")
    except Exception as e:
        print(e)

    detection_centres_x = []
    detection_centres_y = []
    is_centres_filled = False
    detected = False
    index = 0
    detection_area = []

    start_flag = False
    flag_start = pause_time = 0
    sensitivity = gui_sensitivity
    area = centre_x = centre_y = 0
    detection = ""
    direction = ""
    while True:
        predicted_label = ""
        frame = input_q.get()
        if (frame is not None):
            frame_processed += 1
            boxes, scores = detector_utils.detect_objects(frame, detection_graph, sess)

            # get region of interest
            res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params['score_thresh'],
                                               scores, boxes, cap_params['im_width'], cap_params['im_height'], frame)

            # get boundary box
            if pause_time == 0:
                centre_x, centre_y, area = detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                                                            cap_params["score_thresh"],
                                                                            scores, boxes, cap_params['im_width'],
                                                                            cap_params['im_height'],
                                                                            frame)

            if pause_time > 0:
                pause_time -= 1

            if is_centres_filled:
                detection_centres_x = detection_centres_x[1:10]
                detection_centres_y = detection_centres_y[1:10]
                detection_area = detection_area[1:10]

                detection_centres_x.append(centre_x)
                detection_centres_y.append(centre_y)
                detection_area.append(area)
            else:
                detection_centres_x.append(centre_x)
                detection_centres_y.append(centre_y)
                detection_area.append(area)

            if pause_time == 0:
                index += 1

            if index >= sensitivity:
                index = 0
                is_centres_filled = True

            if index == 0:
                predicted_label = classify(res, model, classification_graph, session, poses)
                #print(predicted_label)

            if predicted_label == "Start" and flag_start == 0:
                #print("Start")
                detection = "Start tracking"
                start_flag = True
                flag_start = 1

            if detected:
                detection_centres_x = []
                detection_centres_y = []
                is_centres_filled = False
                index = 0
                detected = False
                detection_area = []
                frame_processed = 0
                pause_time = 30

            centres_x = detection_centres_x.copy()
            centres_y = detection_centres_y.copy()

            areas = detection_area.copy()

            centres_x = [v for v in centres_x if v]
            centres_y = [v for v in centres_y if v]

            areas = [a for a in areas if a]

            # angle_coordinate
            if len(centres_x) > 3 and is_centres_filled and len(centres_y) > 3 and len(areas) > 3 and start_flag :
                flag = 0
                dX = centres_x[-1] - centres_x[0]
                dY = centres_y[-1] - centres_y[0]

                if dX > 20 and dY > 20:
                    m = dY / dX
                    angle = math.degrees(math.atan(m))
                    if angle < 45:
                        flag = 1
                    elif angle > 45:
                        flag = 2

                if dX > 100 and (abs(dY) < 20 or flag == 1):
                    direction = "Right"
                    keyboard.press_and_release('right')
                    detected = True
                    #print(direction)

                elif -dX > 100 and (abs(dY) < 20 or flag == 1):
                    direction = "Left"
                    keyboard.press_and_release('left')
                    detected = True
                    #print(direction)


                elif dY > 50 and (abs(dX) < 10 or flag == 2):
                    direction = "Down"
                    detected = True
                    #print(direction)

                elif -dY > 50 and (abs(dX) < 10 or flag == 2):
                    direction = "Up"
                    detected = True
                    #print(direction)

                elif areas[-1] - 3000 > areas[0] and abs(dX) < 30 and abs(dY) < 20:
                    direction = "Zoom in"
                    detected = True
                    #print(direction)
                elif areas[-1] < areas[0] - 3000 and abs(dX) < 10 and abs(dY) < 20:
                    direction = "Zoom out"
                    detected = True
                    #print(direction)

        output_q.put((frame, direction,predicted_label))
    sess.close()
Esempio n. 19
0
        for rank, rect in enumerate(hands):
            detect_count += 1
            iou = calcIoU(rect, frame)
            if iou > max_iou:
                max_iou = iou
        if max_iou > iou_threshod:
            pass_count += 1
    endt = time.time()
    print(
        "No.{} Handle {} images and elapsed_time is {:.2f}s, detect {} and pass {} ."
        .format(int(50 * score_thresh), len(label_dict), (endt - start),
                detect_count, pass_count))
    return pass_count, detect_count, scan_hand


if __name__ == '__main__':
    to_csv = '/home/zgwu/HandImages/long_video/test_frames/images.csv'
    beyond_iou_threshod = []
    local_label_dict = read_test_csv_from(to_csv)
    local_detection_graph, local_sessd = detector_utils.load_inference_graph(
    )  # ssd to detect hands
    for i in range(50):
        score_th = float(i / 50)
        pass_cnt, detect_cnt, scan_hand = record_tracker(
            local_detection_graph, local_sessd, local_label_dict, score_th)
        beyond_iou_threshod.append(
            (pass_cnt / scan_hand, detect_cnt / scan_hand, scan_hand / 1404))
    for i in range(len(beyond_iou_threshod)):
        x, y, z = beyond_iou_threshod[i]
        print(x, y, z, sep=' ')
Esempio n. 20
0
def worker(input_q, output_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    prev_areas = []
    prev_toplefts = []
    frame_count = 0
    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # draw bounding boxes
            detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                             cap_params["score_thresh"],
                                             scores, boxes,
                                             cap_params['im_width'],
                                             cap_params['im_height'], frame)

            toplefts, bottomrights, areas = detector_utils.get_corners(
                cap_params['num_hands_detect'], cap_params["score_thresh"],
                scores, boxes, cap_params['im_width'], cap_params['im_height'])

            if frame_count >= 2:
                if len(prev_areas) == 0 or len(areas) == 0:
                    pass
                else:
                    dist = {}
                    for current_index in range(len(toplefts)):
                        for prev_index in range(len(prev_toplefts)):
                            dist[point_distance(
                                toplefts[current_index],
                                prev_toplefts[prev_index])] = (current_index,
                                                               prev_index)
                    indices = dist[min(dist.keys())]
                    current_area = areas[current_index]
                    prev_area = prev_areas[prev_index]
                    if compare_areas(current_area, prev_area, 2):
                        print("current:", current_area)
                        print("prev:", prev_area)
                        splatters.append(
                            Splatter(toplefts[current_index],
                                     bottomrights[current_index]))
                        frame_count = 0
                    if len(toplefts) == 2 and len(prev_toplefts) == 2:
                        if compare_areas(areas[1 - current_index],
                                         prev_areas[1 - prev_index], 2):
                            print("current:", current_area)
                            print("prev:", prev_area)
                            splatters.append(
                                Splatter(toplefts[1 - current_index],
                                         bottomrights[1 - current_index]))
                            frame_count = 0
            else:
                frame_count += 1

            # for x in range(0, len(toplefts)):
            #     splatters.append(Splatter(toplefts[x], bottomrights[x]))

            for splotch in splatters:
                if splotch.opacity == 0:
                    splatters.remove(splotch)
                    continue

                roi = frame[splotch.topleft[1]:splotch.bottomright[1],
                            splotch.topleft[0]:splotch.bottomright[0]]
                background = roi.copy()
                overlap = roi.copy()
                background[splotch.outline[:, :, 3] != 0] = (0, 0, 0)
                overlap[splotch.outline[:, :, 3] == 0] = (0, 0, 0)
                overlap_area = cv2.addWeighted(overlap, 1 - splotch.opacity,
                                               splotch.outline[:, :, 0:3],
                                               splotch.opacity, 0)
                dst = cv2.add(overlap_area, background)
                frame[splotch.topleft[1]:splotch.bottomright[1],
                      splotch.topleft[0]:splotch.bottomright[0]] = dst
                splotch.fade()

            prev_areas = areas.copy()
            prev_toplefts = toplefts.copy()

            # add frame with splatters to queue (below)
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 21
0
from xlutils.copy import copy
import numpy as np

lst1 = []
lst2 = []
ap = argparse.ArgumentParser()
ap.add_argument(
    '-d',
    '--display',
    dest='display',
    type=int,
    default=1,
    help='Display the detected images using OpenCV. This reduces FPS')
args = vars(ap.parse_args())

detection_graph_m, sess_m = detector_utils.load_inference_graph(
    detector_utils.PATH_TO_CKPT_M)


def save_data(no_of_time_hand_detected, no_of_time_hand_crossed):
    try:
        today = date.today()
        today = str(today)
        # loc = (r'C:\Users\rahul.tripathi\Desktop\result.xls')

        rb = xlrd.open_workbook('result.xls')
        sheet = rb.sheet_by_index(0)
        sheet.cell_value(0, 0)

        # print(sheet.nrows)
        q = sheet.cell_value(sheet.nrows - 1, 1)
def worker(input_q, output_q, cropped_output_q, inferences_q, landmark_ouput_q,
           cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(
        "landmarks/shape_predictor_68_face_landmarks.dat")

    print(">> loading keras model for worker")
    try:
        model = tf.keras.models.load_model('models/asl_char_model.h5',
                                           compile=False)
    except Exception as e:
        print(e)

    while True:
        frame = input_q.get()
        if (frame is not None):
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # get region of interest
            res = detector_utils.get_box_image(cap_params['num_hands_detect'],
                                               cap_params["score_thresh"],
                                               scores, boxes,
                                               cap_params['im_width'],
                                               cap_params['im_height'], frame)

            # draw bounding boxes
            detector_utils.draw_box_on_image(cap_params['num_hands_detect'],
                                             cap_params["score_thresh"],
                                             scores, boxes,
                                             cap_params['im_width'],
                                             cap_params['im_height'], frame)

            # classify hand
            if res is not None:
                class_res = ""
                try:
                    proba = model.predict(process_image(res))[0]
                    mx = np.argmax(proba)

                    score = proba[mx] * 100
                    sequence = categories[mx][1]
                    class_res = str(score) + "/" + sequence
                except:
                    score = 0.0
                    sequence = ""
                    class_res = "empty"

                inferences_q.put(class_res)

            image_np1 = imutils.resize(frame, width=400)
            gray = cv2.cvtColor(image_np1, cv2.COLOR_BGR2GRAY)

            #lanmarking

            # detect faces in the grayscale frame
            rects = detector(gray, 0)
            # loop over the face detections
            for rect in rects:
                # determine the facial landmarks for the face region, then
                # convert the facial landmark (x, y)-coordinates to a NumPy
                # array
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)

                # loop over the (x, y)-coordinates for the facial landmarks
                # and draw them on the image
                for (x, y) in shape:
                    cv2.circle(image_np1, (x, y), 1, (0, 0, 255), -1)

            # add frame annotated with bounding box to queue
            landmark_ouput_q.put(image_np1)
            cropped_output_q.put(res)
            output_q.put(frame)

            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 23
0
def worker(input_q, output_q, cap_params, frame_processed, movement,
           movement_threshold):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)
    old_centers = [None] * cap_params["num_hands_detect"]
    centers = [None] * cap_params["num_hands_detect"]
    while True:
        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            # draw bounding boxes
            if any(centers):
                # If both hands detected
                if all(old_centers) and all(centers):
                    moved1 = calculate_movement(old_centers[0], centers[0])
                    moved1 += calculate_movement(old_centers[1], centers[1])
                    moved1 = moved1 / 2

                    moved2 = calculate_movement(old_centers[1], centers[0])
                    moved2 += calculate_movement(old_centers[0], centers[1])
                    moved2 = moved2 / 2

                    moved = min(moved1, moved2)

                # Try to match hand movement to closest hand postion before
                else:
                    moved1 = calculate_movement(old_centers[0], centers[0])

                    if old_centers[1]:
                        moved2 = calculate_movement(old_centers[1], centers[0])
                    else:
                        moved2 = 99999

                    if centers[1]:
                        moved3 = calculate_movement(old_centers[0], centers[1])
                    else:
                        moved3 = 99999

                    moved = min(moved1, moved2, moved3)

                if moved > movement_threshold:
                    # moved = 0
                    pass

                with movement.get_lock():
                    movement.value += int(moved)

                old_centers = centers

            centers = detector_utils.draw_box_on_image(
                cap_params["num_hands_detect"], cap_params["score_thresh"],
                scores, boxes, cap_params['im_width'], cap_params['im_height'],
                frame)

            # add frame annotated with bounding box to queue
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 24
0
    def run(self):
        detection_graph, sess = detector_utils.load_inference_graph()

        # parameters for loading data and images
        detection_model_path = 'haarcascade_frontalface_default.xml'
        emotion_model_path = 'fer2013_mini_XCEPTION.102-0.66.hdf5'
        emotion_labels = get_labels('fer2013')

        # hyper-parameters for bounding boxes shape
        frame_window = 10
        emotion_offsets = (20, 40)

        # loading models
        face_detection = load_detection_model(detection_model_path)
        emotion_classifier = load_model(emotion_model_path, compile=False)

        # getting input model shapes for inference
        emotion_target_size = emotion_classifier.input_shape[1:3]

        # starting lists for calculating modes
        emotion_window = []

        start_time = datetime.datetime.now()
        im_width, im_height = (400, 350)
        num_hands_detect = 2  # max number of hands we want to detect/track, can scale this up
        min_threshold = 0.2

        old_points = [None] * num_hands_detect
        cv2.namedWindow('Single-Threaded Detection', cv2.WINDOW_NORMAL)

        while True:
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            if self.image_queue.empty():
                continue
            img_data = base64.b64decode(str(self.image_queue.get()))
            image_np = np.asarray(Image.open(io.BytesIO(img_data)))
            image_np = cv2.flip(image_np, 1)
            gray_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

            faces = detect_faces(face_detection, gray_image)

            # Actual detection. Variable boxes contains the bounding box cordinates for hands detected,
            # while scores contains the confidence for each of these boxes.
            # Hint: If len(boxes) > 1 , you may assume you have found atleast one hand (within your score threshold)

            boxes, scores = detector_utils.detect_objects(
                image_np, detection_graph, sess)

            valid_hands = 0
            calc_displacement = False
            for score in scores:
                if score > min_threshold:
                    valid_hands += 1
            if valid_hands == num_hands_detect:
                calc_displacement = True  #tell function to return new displacement

            # draw bounding boxes on frame
            self.total_displacement += detector_utils.draw_box_on_image(
                num_hands_detect, min_threshold, scores, boxes, im_width,
                im_height, image_np, old_points,
                calc_displacement)  #0.2 is the min threshold
            # Calculate Frames per second (FPS)
            self.num_frames += 1
            elapsed_time = (datetime.datetime.now() -
                            start_time).total_seconds()
            fps = self.num_frames / elapsed_time

            if self.current_emotion in self.emotions:
                self.emotions[self.current_emotion] += 1
            else:
                self.emotions[self.current_emotion] = 1

            print(self.total_displacement / (10 * self.num_frames),
                  self.current_emotion, self.emotions)

            for face_coordinates in faces:
                x1, x2, y1, y2 = apply_offsets(face_coordinates,
                                               emotion_offsets)
                gray_face = gray_image[y1:y2, x1:x2]
                try:
                    gray_face = cv2.resize(gray_face, (emotion_target_size))
                except:
                    continue

                gray_face = preprocess_input(gray_face, True)
                gray_face = np.expand_dims(gray_face, 0)
                gray_face = np.expand_dims(gray_face, -1)
                emotion_prediction = emotion_classifier.predict(gray_face)
                emotion_probability = np.max(emotion_prediction)
                emotion_label_arg = np.argmax(emotion_prediction)
                self.current_emotion = emotion_labels[emotion_label_arg]
                emotion_window.append(self.current_emotion)

                if len(emotion_window) > frame_window:
                    emotion_window.pop(0)
                try:
                    emotion_mode = mode(emotion_window)
                except:
                    continue

                draw_bounding_box(face_coordinates, image_np)

            # Display FPS on frame:
            detector_utils.draw_fps_on_image("FPS : " + str(int(fps)),
                                             image_np)

            cv2.imshow('Single-Threaded Detection',
                       cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))

            if cv2.waitKey(25) & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                break
Esempio n. 25
0
    def __init__(self):
        '''
        Parameters
        '''
        parser = argparse.ArgumentParser(
            description='TensorFlow Object Detection',
            usage='''Default parameters: None''')
        parser.add_argument('-g',
                            '--garlic',
                            action='store_true',
                            help='Sets up Network for Bin Dropper')
        parser.add_argument('-v',
                            '--vamp',
                            action='store_true',
                            help='Sets up Network for the Buoy')
        parser.add_argument('-s',
                            '--stake',
                            action='store_true',
                            help='Sets up Network for the Torpedo Board')
        args = parser.parse_args()

        self.tf_listener = tf.TransformListener()

        self.garlic = args.garlic
        self.vamp = args.vamp
        self.stake = args.stake

        if self.garlic:
            self.target = 'garlic'
            self.classes = 1
            self.see_sub = mil_tools.Image_Subscriber(
                topic="/camera/down/image_raw", callback=self.img_callback)
        elif self.vamp:
            self.target = 'vamp'
            self.classes = 4
            self.see_sub = mil_tools.Image_Subscriber(
                topic="/camera/front/left/image_raw",
                callback=self.img_callback)
        else:
            self.target = 'stake'
            self.classes = 1
            self.see_sub = mil_tools.Image_Subscriber(
                topic="/camera/front/left/image_raw",
                callback=self.img_callback)
        # Number of frames
        self.num_frames = rospy.get_param('~num_frames', 0)
        # Number of objects we detect
        self.num_objects_detect = rospy.get_param('~objects_detected', 1)
        # Mininum confidence score for the detections
        self.score_thresh = rospy.get_param('~score_thresh', 0.99)
        # If we want debug images published or not.
        self.debug = rospy.get_param('~debug', True)
        # Camera image width and height
        self.im_width = 1920
        self.im_height = 1080
        # Current time
        self.start_time = datetime.datetime.now()
        '''
        Misc Utils, Image Subscriber, and Service Call.
        '''
        # CV bridge for converting from rosmessage to cv_image
        self.bridge = CvBridge()

        self.inference_graph, self.sess = detector_utils.load_inference_graph(
            self.target, self.classes)

        # Subscribes to our image topic, allowing us to process the images
        # self.sub1 = rospy.Subscriber(
        #    self.camera_topic, Image, self.img_callback, queue_size=1)
        self.see_frame_counter = 0
        self.see_camera_info = self.see_sub.wait_for_camera_info()
        self.see_img_geom = image_geometry.PinholeCameraModel()
        self.see_img_geom.fromCameraInfo(self.see_camera_info)
        '''
        Publishers:
        debug_image_pub: publishes the images showing what tensorflow has
        identified as path markers
        bbox_pub: publishes the bounding boxes. 
        '''
        self.debug_image_pub = rospy.Publisher('path_debug',
                                               Image,
                                               queue_size=1)
        self.bbox_pub = rospy.Publisher('bbox_pub', Point, queue_size=1)
        self.roi_pub = rospy.Publisher('roi_pub',
                                       RegionOfInterest,
                                       queue_size=1)
        self.dictionary = {}
def detector():

    ##
    # Game params
    #

    HEIGHT = 720
    paddle_left_y = 360
    paddle_right_y = 360
    PADDLE_HEIGHT = 150

    ##
    # Hand detector setup
    #
    score_thresh = 0.7
    fps = 10
    video_source = 0
    width = 640
    height = 480
    display = 1

    cap = cv2.VideoCapture(video_source)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)

    im_width, im_height = (cap.get(3), cap.get(4))

    detection_graph, sess = detector_utils.load_inference_graph()

    # max number of hands we want to detect/track
    num_hands_detect = 2

    wd = 640
    ht = 480

    cv2.namedWindow('Left Hand', cv2.WINDOW_NORMAL)
    cv2.namedWindow('Right Hand', cv2.WINDOW_NORMAL)
    while True:
        #         try:
        ret, image_np = cap.read()
        image_np = cv2.resize(image_np, (wd, ht))
        image_np = cv2.flip(image_np, 1)
        try:
            image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
        except:
            print("Error converting to RGB")

        width_cutoff = int(wd / 2)
        image_l = image_np[:, :width_cutoff]
        image_r = image_np[:, width_cutoff:]

        w, h = int(wd / 2), ht

        boxesl, scoresl = detector_utils.detect_objects(
            image_l, detection_graph, sess)
        detector_utils.draw_box_on_image(num_hands_detect, score_thresh,
                                         scoresl, boxesl, w, h, image_l)
        boxesr, scoresr = detector_utils.detect_objects(
            image_r, detection_graph, sess)
        detector_utils.draw_box_on_image(num_hands_detect, score_thresh,
                                         scoresr, boxesr, w, h, image_r)
        cv2.imshow('Left Hand', cv2.cvtColor(image_l, cv2.COLOR_RGB2BGR))
        cv2.imshow('Right Hand', cv2.cvtColor(image_r, cv2.COLOR_RGB2BGR))
        if cv2.waitKey(25) & 0xFF == 27:
            cv2.destroyAllWindows()
            client.close()
            break
        for i in range(num_hands_detect):
            if (scoresl[i] > score_thresh):
                (left, right, top,
                 bottom) = (boxesl[i][1] * w, boxesl[i][3] * w,
                            boxesl[i][0] * h, boxesl[i][2] * h)

                #paddle_left_y = int(((top+bottom)/2)* (HEIGHT/ht))

                paddle_left_y = map_val(top, bottom)

                if paddle_left_y < 0:
                    paddle_left_y = 0

                if paddle_left_y > (HEIGHT - PADDLE_HEIGHT / 2):
                    paddle_left_y = HEIGHT - PADDLE_HEIGHT / 2
                try:
                    print('Sending Left paddle coordinates...')
                    send_paddle_coordinates('left', paddle_left_y)
                    print('Sent !')
                except:
                    pass

##                print(int((top+bottom)/2 * (HEIGHT/ht)))

            if (scoresr[i] > score_thresh):
                (left, right, top,
                 bottom) = (boxesr[i][1] * w, boxesr[i][3] * w,
                            boxesr[i][0] * h, boxesr[i][2] * h)

                #paddle_right_y = int(((top+bottom)/2)* (HEIGHT/ht))

                paddle_right_y = map_val(top, bottom)

                if paddle_right_y > (HEIGHT - PADDLE_HEIGHT / 2):
                    paddle_right_y = HEIGHT - PADDLE_HEIGHT / 2
                if paddle_right_y < 0:
                    paddle_right_y = 0
                try:
                    print('Sending Rigth paddle coordinates...')
                    send_paddle_coordinates('right', paddle_right_y)
                    print('Sent !')
                except:
                    pass
def run_test(read_csv, logger):
    img_folder = '/home/zgwu/HandImages/long_video/test_frames/'
    save_folder = '/home/zgwu/HandImages/long_video/double_frames/'
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    label_dict = read_test_csv_from(read_csv)
    input_images = tf.placeholder(dtype=tf.float32,
                                  shape=[None, 224, 224, 3],
                                  name='input')
    # placeholder holds an input tensor for classification
    if ModelType == 'mobilenet':
        out, end_points = mobilenet_v2.mobilenet(input_tensor=input_images,
                                                 num_classes=num_classes,
                                                 depth_multiplier=1.0,
                                                 is_training=False)
    else:
        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
            out, end_points = inception_v3.inception_v3(
                inputs=input_images,
                num_classes=num_classes,
                is_training=False)
    detection_graph, sessd = detector_utils.load_inference_graph(
    )  # ssd to detect hands
    saver = tf.train.Saver()
    sess = tf.Session()
    saver.restore(sess, CKPT)
    CM = [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0]]
    D = {
        'num_pictures': 0,
        'acc': 0,
        'num_hands': 0,
        'detect_t': 0.0,
        'classify_t': 0.0,
        'o': [0, 0, 0, 0, 0, 0, 0],
        'd': [0, 0, 0, 0, 0, 0, 0],
        'r': [0, 0, 0, 0, 0, 0, 0],
        'tp': [0, 0, 0, 0, 0, 0, 0]
    }
    tot_count = 0
    y_true, y_pred = [], []

    label_matrix = np.empty((0, num_classes), dtype=int)
    score_matrix = np.empty((0, num_classes), dtype=int)
    for num_img, (img_name, frame_label) in enumerate(label_dict.items()):
        tot_count += 1
        if tot_count % 100 == 1:
            print('Process {} --- {} / {}'.format(img_name, tot_count,
                                                  len(label_dict)))
        l, t, r, b, clazz = frame_label
        acc_index = labelsDict[clazz]
        D['o'][acc_index] += 1  # confusion matrix

        # filename = os.path.basename(img_name)
        name, ext = os.path.splitext(img_name)
        # print("Processing the image : " + name + " ... {}/{}".format(num_img+1, len(label_dict)))
        key = cv2.waitKey(5) & 0xff  ## Use Esc key to close the program
        if key == 27:
            break
        if key == ord('p'):
            cv2.waitKey(0)
        image_raw = cv2.imread(os.path.join(img_folder, img_name))
        image_np = cv2.resize(image_raw, (im_width, im_height))
        try:
            image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
        except:
            print("Error converting to RGB")
        start = time.time()  # set time
        boxes, scores = detector_utils.detect_objects(image_np,
                                                      detection_graph, sessd)
        hands = from_image_crop_boxes(num_hands_detect, score_thresh, scores,
                                      boxes, im_width, im_height, 0.5)
        endh = time.time()  # detect hand
        detect_t = endh - start
        D['detect_t'] += detect_t
        if not hands:
            continue
        else:
            D['num_pictures'] += 1
        D['d'][acc_index] += 1
        for rank, rect in enumerate(hands):
            D['num_hands'] += 1
            left, top, right, bottom = rect
            region = image_np[top:bottom, left:right]
            region = cv2.resize(region, (224, 224), cv2.INTER_AREA)
            # feed = np.expand_dims(region, axis=0)   # maybe it's a wrong format to feed
            img_data = tf.image.convert_image_dtype(np.array(region)[:, :,
                                                                     0:3],
                                                    dtype=tf.float32)  # RGB
            # elements are in [0,1)
            resized_img = tf.image.resize_images(img_data,
                                                 size=[224, 224],
                                                 method=0)
            # decode an image
            img = resized_img.eval(session=sess)
            img.resize([1, 224, 224, 3])
            # input an image array and inference to get predictions and set normal format
            predictions = end_points['Predictions'].eval(
                session=sess, feed_dict={input_images: img})

            label = np.zeros((1, num_classes), dtype=int)
            label[0, acc_index] = 1
            label_matrix = np.append(label_matrix, label, axis=0)
            score_matrix = np.append(score_matrix,
                                     predictions.reshape([1, num_classes]),
                                     axis=0)
            #print(label, predictions.reshape([1, num_classes]))

            predictions.resize([num_classes])
            np.set_printoptions(precision=4, suppress=True)
            index = int(np.argmax(predictions))
            y_true.append(acc_index)
            y_pred.append(index)
            D['r'][index] += 1
            msg = img_name + ' ' + clazz + ' ' + labelsStr[index]
            CM[acc_index][index] += 1

            if index == acc_index:
                D['acc'] += 1
                D['tp'][index] += 1

            logger.info(msg)
            if key == ord('s'):
                region = cv2.cvtColor(region, cv2.COLOR_RGB2BGR)
                cv2.imwrite(
                    frame_path + name + '_' + str(D['num_frames']) + '_' +
                    str(rank) + '.jpg', region)
                cv2.waitKey(0)
        endr = time.time()
        classify_t = endr - endh
        D['classify_t'] += classify_t
    print(
        "From {} pictures, we detect {} hands with {} accurate prediction ({:.2f})"
        .format(tot_count, D['num_hands'], D['acc'],
                D['acc'] / D['num_hands']))
    result_log = '\n@@images_count: {} and detect_count: {}'.format(tot_count, D['num_pictures']) + \
                 '\n@@image_size: (width : {}, height: {})'.format(im_width, im_height) + \
                 '\n@@num_hand_detect: {} - {}%'.format(D['num_hands'], int(100 * D['num_hands'] / tot_count)) + \
                 '\n@@each_elapsed_time: (detect_hands: {:.4f}s, classify_hand: {:.4f}s)'.format(
                     D['detect_t'] / tot_count, D['classify_t'] / D['num_hands']) + \
                 '\n@@classify_result: Fist  Admire  Victory  Okay  None  Palm  Six' + \
                 '\n                   {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- origin classes' \
                 '\n                   {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- detect classes' \
                 '\n                   {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- recognize count' \
                 '\n                   {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- true positive' \
                     .format(D['o'][0], D['o'][1], D['o'][2], D['o'][3],D['o'][4], D['o'][5], D['o'][6],
                     D['d'][0], D['d'][1], D['d'][2], D['d'][3],D['d'][4], D['d'][5], D['d'][6],
                     D['r'][0], D['r'][1], D['r'][2], D['r'][3], D['r'][4], D['r'][5], D['r'][6],
                     D['tp'][0], D['tp'][1], D['tp'][2], D['tp'][3], D['tp'][4], D['tp'][5], D['tp'][6]) + \
                 '\n@@accuracy: {}/{} - {}%'.format(D['acc'], D['num_hands'], int(100 * D['acc'] / D['num_hands'])) + \
                 '\n' + '-' * 100 + \
                 '\n' + str(CM)
    #print(result_log)
    logger.info(result_log)
    #print(classification_report(y_true, y_pred, target_names=labelsStr, digits=3))
    logger.info(
        str(
            classification_report(y_true,
                                  y_pred,
                                  target_names=labelsStr,
                                  digits=3)))

    print(label_matrix.shape, score_matrix.shape)
    # 计算每一类的ROC
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # Compute micro-average ROC curve and ROC area(方法二)
    fpr["micro"], tpr["micro"], _ = roc_curve(label_matrix.ravel(),
                                              score_matrix.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # FPR就是横坐标,TPR就是纵坐标
    plt.plot(fpr["micro"],
             tpr["micro"],
             c='r',
             lw=2,
             alpha=0.7,
             label=u'AUC=%.3f' % roc_auc["micro"])
    plt.plot((0, 1), (0, 1), c='#808080', lw=1, ls='--', alpha=0.7)
    plt.xlim((-0.01, 1.02))
    plt.ylim((-0.01, 1.02))
    plt.xticks(np.arange(0, 1.1, 0.1))
    plt.yticks(np.arange(0, 1.1, 0.1))
    plt.xlabel('False Positive Rate', fontsize=13)
    plt.ylabel('True Positive Rate', fontsize=13)
    plt.grid(b=True, ls=':')
    plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12)
    plt.title(u'The ROC and AUC of MobileNet Classifier.', fontsize=17)
    plt.show()
    """
Esempio n. 28
0
def worker(input_q, output_q, cropped_output_q, inferences_q, pointX_q, pointY_q, cap_params, frame_processed):
    print(">> loading frozen model for worker")
    detection_graph, sess = detector_utils.load_inference_graph()
    sess = tf.Session(graph=detection_graph)

    print(">> loading keras model for worker")
    try:
        model, classification_graph, session = classifier.load_KerasGraph("cnn/models/hand_poses_wGarbage_10.h5")
    except Exception as e:
        print(e)
    while True:

        #print("> ===== in worker loop, frame ", frame_processed)
        frame = input_q.get()
        if (frame is not None):
            #実際の検出。 変数ボックスには、検出された手の境界ボックスの座標が含まれています。
            #スコアには、これらの各ボックスの信頼度が含まれています。
            #ヒント:len(boxes)> 1の場合、(スコアのしきい値内で)少なくとも片方の手を見つけたと見なすことができます。
            boxes, scores = detector_utils.detect_objects(
                frame, detection_graph, sess)

            #関心領域を取得
            #フレームの画像サイズを取得
            cropped_height,cropped_width,a = frame.shape[:3]
            res = detector_utils.get_box_image(cap_params['num_hands_detect'], cap_params["score_thresh"],
                scores, boxes, cropped_width,cropped_height, frame)

            #手の判定が一定値を超えたとき
            if (scores[0] > score_thresh):
                (left, right, top, bottom) = (boxes[0][1] * cropped_width, boxes[0][3] * cropped_width,
                                              boxes[0][0] * cropped_height, boxes[0][2] * cropped_height)

                #ウィンドウサイズを取得
                width,height = autopy.screen.size()

                #画面比率変数設定
                # wx = (width + (int(right)-int(left))*(width / cap_params['im_width'])) / cap_params['im_width']
                #
                # hx = (height + (int(bottom)-int(top))*(height / cap_params['im_height'])) / cap_params['im_height']
                # wx = (width + (int(right)-int(left))*(width / cropped_width)) / (cropped_width-20)
                #
                # hx = (height + (int(bottom)-int(top))*(height / cropped_height)) / (cropped_height-20)
                # p1 = int(left)*wx
                # p2 = int(bottom)*hx-(int(bottom)*hx-int(top)*hx)

                hx = (height / cropped_height)*1.3
                wx = (width / cropped_width)*1.3

                #手を識別したボックスの固定化処理
                hi = 100 - (int(bottom)-int(top))
                wi = 70 - (int(right)-int(left))



                top = top - hi

                left = left - wi

                fp = (int(left),int(top))
                ep = (int(right),int(bottom))

                p1 = int(left)*wx
                p2 = int(top)*wx
                if(abs(hi)>25 or abs(wi)>25):
                    cv2.rectangle(frame, fp, ep, (255, 0, 0), 1, 1)
                #判定した手の範囲を表示
                else:
                    cv2.rectangle(frame, fp, ep, (77, 255, 9), 1, 1)
                #取得した座標(p1,p2)を挿入
                pointX_q.put(p1)
                pointY_q.put(p2)
            #手のポーズを分類する
            if res is not None:
                class_res = classifier.classify(model, classification_graph, session, res)
                inferences_q.put(class_res)

            #バウンディングボックスで注釈が付けられたフレームをキューに追加
            cropped_output_q.put(res)
            output_q.put(frame)
            frame_processed += 1
        else:
            output_q.put(frame)
    sess.close()
Esempio n. 29
0
def detect():
    global sess
    global label_lines
    global softmax_tensor
    global res
    global score
    global frame
    global fist
    global two
    global three
    global four
    global five
    global status
    global sfive
    global path
    color = (0, 255, 0)
    label_lines = [line.rstrip() for line
                   in tf.gfile.GFile("output_labels.txt")]

    # Unpersists graph from file
    with tf.gfile.FastGFile("output_graph.pb", 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')

    detection_graph, sessD = detector_utils.load_inference_graph()
    fist=0
    two=0
    three=0
    four=0
    five=0
    sfive=0
    status='command'
    path=''
    with tf.Session() as sess:
        score_thresh = 0.60
        softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
        # Get stream from webcam and set parameters)
        vs = VideoStream().start()

        # max number of hands we want to detect/track
        num_hands_detect = 1

        im_height, im_width = (None, None)
        try:
            while True:
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    cv2.destroyAllWindows()
                    vs.stop()
                    break
                # Read Frame and process
                frame = vs.read()
                frame = cv2.flip(frame, 1)
                frame = cv2.resize(frame, (640, 480))

                if im_height is None:
                    im_height, im_width = frame.shape[:2]

                # Convert image to rgb since opencv loads images in bgr, if not accuracy will decrease
                try:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                except:
                    print("Error converting to RGB")

                # Run image through tensorflow graph
                boxes, scores, classes = detector_utils.detect_objects(
                    frame, detection_graph, sessD)

                for i in range(num_hands_detect):
                    if scores[i] > score_thresh:
                        (left, right, top, bottom) = (boxes[i][1] * im_width, boxes[i][3] * im_width,
                                                      boxes[i][0] * im_height, boxes[i][2] * im_height)
                        height = bottom - top
                        width = right - left
                        img_cropped = frame[int(top):int(top + height), int(left):int(left + width)]
                        img_cropped = cv2.cvtColor(img_cropped, cv2.COLOR_RGB2BGR)
                        image_data = cv2.imencode('.jpg', img_cropped)[1].tostring()

                        res, score = predict(image_data)
                        cv2.putText(frame, '%s' % (res.upper()), (100,400), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255), 4)
                        #(frame,text,co-ordinates,fontype,font size,fontcolor,font boldness)
                        cv2.putText(frame, '(score = %.5f)' % (float(score)), (100,450), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255),1)
                      
                        if classes[i] == 1: hlabel = 'open'
                        if classes[i] == 2: hlabel='close'
                        cv2.putText(frame, hlabel+str("{0:.2f}".format(scores[i])), (int(left), int(top) - 5),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color,1)
                        if status=='command':
                            actions_invoke(res, score)        
                        else:
                            actions_perform(hlabel,path,res,score)
                    # Draw bounding boxes and text
                        detector_utils.draw_box_on_image(
                            num_hands_detect, score_thresh, scores, boxes, classes, im_width, im_height, frame)

                    cv2.imshow('Detection', cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

                               
        except KeyboardInterrupt:
            pass  # print("Average FPS: ", str("{0:.2f}".format(fps)))
Esempio n. 30
0
    def __init__(self):
        '''
        Parameters
        '''
        # Timeout threshold in seconds
        self.time_thresh = rospy.get_param('~time_thresh', 1)
        # Centering Threshold in pixels
        self.centering_thresh = rospy.get_param('~centering_thresh', 50)
        # Color thresholds to find orange, BGR
        self.lower = rospy.get_param('~lower_color_threshold', [0, 0, 0])
        self.upper = rospy.get_param('~upper_color_threshold', [255, 255, 255])
        # Camera topic we are pulling images from for processing
        self.camera_topic = rospy.get_param('~camera_topic',
                                            '/camera/front/left/image_rect_color')
        # Number of frames
        self.num_frames = rospy.get_param('~num_frames', 0)
        # Number of objects we detect
        self.num_objects_detect = rospy.get_param('~objects_detected', 2)
        # Mininum confidence score for the detections
        self.score_thresh = rospy.get_param('~score_thresh', 0.1)
        # If we want debug images published or not.
        self.debug = rospy.get_param('~debug', True)
        # Camera image width and height
        self.im_width = rospy.get_param('~im_width', 720)
        self.im_height = rospy.get_param('~im_height', 480)
        # Current time
        self.start_time = datetime.datetime.now()
        # Midpoint of the overall image, in pixels
        self.midpoint = [self.im_width / 2, self.im_height / 2]
        # Whether or not we have centered on an object.
        self.centered = False
        '''
        Misc Utils, Image Subscriber, and Service Call.
        '''
        # CV bridge for converting from rosmessage to cv_image
        self.bridge = CvBridge()

        self.inference_graph, self.sess = detector_utils.load_inference_graph()

        # Subscribes to our image topic, allowing us to process the images
        self.sub1 = rospy.Subscriber(
            self.camera_topic, Image, self.img_callback, queue_size=1)
        '''
        Publishers:
        debug_image_pub: publishes the images showing what tensorflow has
        identified as path markers

        orange_image_pub: publishes our masked image, showing what we see to
        be orange.

        direction_pub: publishes the proposed direction of the path marker.
        If our mission script reads this and confirms it is a marker,it turns
        following this direction. Either 'left', 'right', or 'none.'

        orange_detection: ensures we are centering on an orange group of pixels

        path_roi: publishes the region of interest and its coordinates.
        This is used for debugging purposes.
        '''
        self.debug_image_pub = rospy.Publisher(
            'path_debug', Image, queue_size=1)
        self.orange_image_pub = rospy.Publisher(
            'orange_debug', Image, queue_size=1)
        self.direction_pub = rospy.Publisher(
            'path_direction', String, queue_size=1)
        self.orange_detection = rospy.Publisher(
            'path_orange', String, queue_size=1)
roslib.load_manifest('easy_markers')
import rospy
from easy_markers.generator import *
from visualization_msgs.msg import Marker
from utils import detector_utils as detector_utils
import cv2
import tensorflow as tf
import datetime
import argparse
# from utils.detector_utils import WebcamVideoStream
import pyrealsense2 as rs
import numpy as np
import matplotlib.pyplot as plt
# cap = cv2.VideoCapture(0)
detection_graph, sess = detector_utils.load_inference_graph()

if __name__ == '__main__':

    rospy.init_node('hand_tracking')
    pub = rospy.Publisher('/hand_mark', Marker)
    gen = MarkerGenerator()
    rate = rospy.Rate(2)
    gen.type = Marker.SPHERE_LIST
    gen.scale = [.03] * 3
    gen.frame_id = '/camera_color_optical_frame'
    # count = 0
    parser = argparse.ArgumentParser()
    parser.add_argument('-sth',
                        '--scorethreshold',
                        dest='score_thresh',
Esempio n. 32
0
    def __init__(self):
        '''
        Parameters
        '''
        # Timeout threshold in seconds
        self.time_thresh = rospy.get_param('~time_thresh', 1)
        # Centering Threshold in pixels
        self.centering_thresh = rospy.get_param('~centering_thresh', 50)
        # Color thresholds to find orange, BGR
        self.lower = rospy.get_param('~lower_color_threshold', [0, 30, 100])
        self.upper = rospy.get_param('~upper_color_threshold', [100, 255, 255])
        # Camera topic we are pulling images from for processing
        self.camera_topic = rospy.get_param('~camera_topic',
                                            '/camera/down/image_rect_color')
        # Number of frames
        self.num_frames = rospy.get_param('~num_frames', 0)
        # Number of objects we detect
        self.num_objects_detect = rospy.get_param('~objects_detected', 2)
        # Mininum confidence score for the detections
        self.score_thresh = rospy.get_param('~score_thresh', 0.1)
        # If we want debug images published or not.
        self.debug = rospy.get_param('~debug', True)
        # Camera image width and height
        self.im_width = rospy.get_param('~im_width', 720)
        self.im_height = rospy.get_param('~im_height', 480)
        # Current time
        self.start_time = datetime.datetime.now()
        # Midpoint of the overall image, in pixels
        self.midpoint = [self.im_width / 2, self.im_height / 2]
        # Whether or not we have centered on an object.
        self.centered = False
        # Whether or not we are enabled
        self.enabled = False
        '''
        Misc Utils, Image Subscriber, and Service Call.
        '''
        # CV bridge for converting from rosmessage to cv_image
        self.bridge = CvBridge()

        # Inference graph and session for tensorflow
        self.inference_graph, self.sess = detector_utils.load_inference_graph()

        # Service Call = the on/off switch for this perception file.
        rospy.Service('~enable', SetBool, self.toggle_search)

        # Subscribes to our image topic, allowing us to process the images
        self.sub1 = rospy.Subscriber(self.camera_topic,
                                     Image,
                                     self.img_callback,
                                     queue_size=1)
        '''
        Publishers:
        debug_image_pub: publishes the images showing what tensorflow has
        identified as path markers

        orange_image_pub: publishes our masked image, showing what we see to
        be orange.

        direction_pub: publishes the proposed direction of the path marker.
        If our mission script reads this and confirms it is a marker,it turns
        following this direction. Either 'left', 'right', or 'none.'

        orange_detection: ensures we are centering on an orange group of pixels

        path_roi: publishes the region of interest and its coordinates.
        This is used for debugging purposes.
        '''
        self.debug_image_pub = rospy.Publisher('path_debug',
                                               Image,
                                               queue_size=1)
        self.orange_image_pub = rospy.Publisher('orange_debug',
                                                Image,
                                                queue_size=1)
        self.direction_pub = rospy.Publisher('path_direction',
                                             String,
                                             queue_size=1)
        self.orange_detection = rospy.Publisher('path_orange',
                                                String,
                                                queue_size=1)