def save_person_information(name):
    saved_model = './ArcFace/model/068.pth'
    info_path = './users/' + name
    if not os.path.exists(info_path):
        os.makedirs(info_path)

    # threshold =  0.30896
    model = mobileFaceNet()
    model.load_state_dict(t.load(saved_model)['backbone_net_list'])
    model.eval()
    use_cuda = t.cuda.is_available() and True
    device = t.device("cuda" if use_cuda else "cpu")
    # is_cuda_avilableqq
    trans = transforms.Compose([
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    model.to(device)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print('failed open camara!!!')
    ret, frame = cap.read()
    while ret:
        frame = frame[:, :, ::-1]
        img = Image.fromarray(frame)
        bboxes, landmark = detect_faces(img)
        show_img = show_bboxes(img, bboxes, landmark)
        show_img = np.array(show_img)[:, :, ::-1]
        show_img = show_img.copy()
        cv2.putText(show_img, "press 'c' to crop your face", (0, 50),
                    cv2.FONT_HERSHEY_PLAIN, 2, [255, 0, 0], 2)
        cv2.imshow('img', show_img)  # 480 640 3
        if cv2.waitKey(1) & 0xFF == ord('c'):
            person_img = frame[int(bboxes[0, 1]):int(bboxes[0, 3]),
                               int(bboxes[0, 0]):int(bboxes[0, 2])]
            cv2.imshow('crop', person_img[:, :, ::-1])
            cv2.imwrite(os.path.join(info_path, '%s.jpg' % (name)),
                        person_img[:, :, ::-1])
            feature = np.squeeze(get_feature(person_img, model, trans, device))
            np.savetxt(os.path.join(info_path, '%s.txt' % (name)), feature)

        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        ret, frame = cap.read()
Beispiel #2
0
def detect():
    videoCapture = cv2.VideoCapture(args.input_path)
    fps = videoCapture.get(cv2.CAP_PROP_FPS)
    size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    videoWriter = cv2.VideoWriter(args.output_path,
                                  cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'),
                                  fps, size)
    success, img1 = videoCapture.read()
    img2 = Image.fromarray(img1)

    while success:
        bounding_boxes, _ = detect_faces(img2)
        if len(bounding_boxes) != 0:
            gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
            for i in range(0, len(bounding_boxes)):
                if bounding_boxes[i, 4] < 0.99:
                    continue
                x1, y1, x2, y2 = int(bounding_boxes[i, 0]), int(
                    bounding_boxes[i, 1]), int(bounding_boxes[i, 2]), int(
                        bounding_boxes[i, 3])
                img1 = cv2.rectangle(img1, (x1, y1), (x2, y2), (255, 0, 0), 2)
                roi_gray = gray[y1:y2, x1:x2]
                # print(bounding_boxes[i, 4])
                f = cv2.resize(roi_gray, (img_size, img_size))
                f = f.reshape(1, 1, img_size, img_size)
                f = Variable(torch.cuda.FloatTensor(f))
                output = net(f)
                _, label = torch.max(output.data, 1)
                label = label.cpu().numpy()
                if (label == 0):
                    cv2.putText(img1, 'Woman', (x1, y1 - 20),
                                cv2.FONT_HERSHEY_TRIPLEX, 1, 255, 2)
                elif (label == 1):
                    cv2.putText(img1, 'Man', (x1, y1 - 20),
                                cv2.FONT_HERSHEY_TRIPLEX, 1, 255, 2)
            videoWriter.write(img1)
            success, img1 = videoCapture.read()
            img2 = Image.fromarray(img1)
        else:
            videoWriter.write(img1)
            uccess, img1 = videoCapture.read()
            img2 = Image.fromarray(img1)
def main():
##########################################################################################################
    #preparation part
    confidence = float(0.25)
    nms_thesh = float(0.4)
    start = 0
    CUDA = torch.cuda.is_available()
    
    num_classes = 80
    
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    
    model.net_info["height"] =  "160"
    inp_dim = int(model.net_info["height"])
    
    assert inp_dim % 32 == 0                   #assert后面语句为false时触发,中断程序
    assert inp_dim > 32
    
    if CUDA:
        model.cuda()
   
    model.eval()

    #Kalman Filter
    tracker = Tracker(dist_thresh = 160, max_frames_to_skip = 100, 
                                        max_trace_length = 5, trackIdCount = 1)
    
    global confirm
    global person
    
    fps = 0.0
    count = 0
    frame = 0    
    person = []
    confirm = False
    reconfirm = False
    count_yolo = 0
    '''
    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True)
    '''
    cap = cv2.VideoCapture('test_video/test.avi')
    
    detect_time = []
    recogn_time = []
    kalman_time = []
    aux_time = []
    while True:
        start = time.time()  
        ret, color_image = cap.read()
        '''
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())
        '''
        if color_image is None:
            break

        img, orig_im, dim = prep_image(color_image, inp_dim)
        
        im_dim = torch.FloatTensor(dim).repeat(1,2)  
                
##################################################################################################
        #people detection part                
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        time_a = time.time()
        if count_yolo %3 ==0:
            output = model(Variable(img), CUDA)                         #适配后的图像放进yolo网络中,得到检测的结果
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)         


            if type(output) == int:
                fps  = ( fps + (1./(time.time()-start)) ) / 2
                print("fps= %f"%(fps))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
        
            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim                #夹紧张量,限制在一个区间内
        
            #im_dim = im_dim.repeat(output.size(0), 1)
            output[:,[1,3]] *= color_image.shape[1]
            output[:,[2,4]] *= color_image.shape[0]
            output = output.cpu().numpy() 
            output = sellect_person(output)                                       #把标签不是人的output去掉,减少计算量
            output = np.array(output)

            output_update = output
        elif count_yolo %3 != 0:
            output = output_update
        count_yolo += 1
        list(map(lambda x: write(x, orig_im), output))                #把结果加到原来的图像中   
        #output的[0,1:4]分别为框的左上和右下的点的位置
        detect_time.append(time.time() - time_a)
###########################################################################################################
        #kalman filter tracking part
        time_a = time.time()
        output_kalman_xywh = to_xy(output)                   #把output数据变成适合kalman更新的类型
        if (len(output_kalman_xywh) > 0):
            tracker.Update(output_kalman_xywh)                #用kalman filter更新框的位置
        
        outputs_kalman_normal = np.array(xy_to_normal(output,tracker.tracks)) #换回原来的数据形式
        #画框
        for output_kalman_normal in outputs_kalman_normal:
            cv2.rectangle(orig_im, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), 
                                        (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2)
            cv2.putText(orig_im, str(output_kalman_normal[4]),(int(output_kalman_normal[0]), int(output_kalman_normal[1])),
                                    0, 5e-3 * 200, (0,255,0),2)              #track id 就是数字  
        kalman_time.append(time.time() - time_a)
#tracker.tracks[i].track_id
########################################################################################################
        #face recognition part
        time_a = time.time()
        if confirm == False:

            saved_model = './ArcFace/model/068.pth'
            name_list = os.listdir('./users')
            path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list]
            total_features = np.empty((128,),np.float32)

            for i in path_list:
                temp = np.loadtxt(i)
                total_features = np.vstack((total_features,temp))
            total_features = total_features[1:]

            #threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
            threshold = 0.5
            model_facenet = mobileFaceNet()
            model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
            model_facenet.eval()
            #use_cuda = torch.cuda.is_available() and True
            #device = torch.device("cuda" if use_cuda else "cpu")
            device = torch.device("cuda")

            # is_cuda_avilable
            trans = transforms.Compose([
                transforms.Resize((112,112)),
                transforms.ToTensor(),
                transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
            ])
            model_facenet.to(device)

            img = Image.fromarray(color_image)
            bboxes, landmark = detect_faces(img)                                                                  #首先检测脸

            if len(bboxes) == 0:
                print('detect no people')
            else:
                for bbox in bboxes:
                    print(bbox[:4])
                    loc_x_y = [bbox[2], bbox[1]]
                    person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy()              #从图像中截取框
                    feature = np.squeeze(get_feature(person_img, model_facenet, trans, device))                               #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if  cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]  
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im,person,font,loc_x_y)                                                                    #加名字
                    cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))           #加box
            #cv2.imshow("frame", orig_im)

############################################################################################################
            #confirmpart
            print('confirmation rate: {} %'.format(count*10))
            cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
            if len(bboxes)!=0 and len(output)!=0:
                if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person:
                    count+=1
                frame+=1
            if count>=10 and frame<=30:
                confirm = True
                print('confirm the face is belong to that people')
            elif  frame >= 30:
                print('fail confirm, and start again')
                reconfirm = True
                count = 0
                frame = 0
            if reconfirm == True:
                cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)      
        recogn_time.append(time.time() - time_a)             

###############################################################################################################
        time_a = time.time()
        #show the final output result
        if not confirm:
            cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20),
                                     cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2)
        if confirm:
            for output_kalman_normal in outputs_kalman_normal:
                if output_kalman_normal[4] == 1:
                    cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20),
                                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
                    
                    #dist_info = get_dist_info(depth_image,bbox)                   #深度信息z
                    
                    #orig_im = add_dist_info(orig_im,bbox,dist_info)



        cv2.imshow("frame", orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break

        aux_time.append(time.time()-time_a)
        fps  = ( fps + (1./(time.time()-start)) ) / 2
        print("fps= %f"%(fps))
    
    avg_detect_time = np.mean(detect_time)
    avg_recogn_time = np.mean(recogn_time)
    avg_kalman_time = np.mean(kalman_time)
    avg_aux_time = np.mean(aux_time)
    print("avg detect: {}".format(avg_detect_time))
    print("avg recogn: {}".format(avg_recogn_time))
    print("avg kalman: {}".format(avg_kalman_time))
    print("avg aux: {}".format(avg_aux_time))
    print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
Beispiel #4
0
net = faceNet.faceNet_BN(classnum=10576, m=opt.marginFactor)
state_dict = torch.load('./cosFace/checkpoint/netFinal_8.pth')
net.load_state_dict(state_dict)
net = net.cuda(0)

video_capture = cv2.VideoCapture(0)

while True:
    _, frame = video_capture.read()
    frame = cv2.cvtColor(
        frame, cv2.COLOR_BGR2RGB
    )  #the model was indeed trained on BGR but all my alignment functions reverse it. So deal with it
    frame = Image.fromarray(
        np.uint8(frame)
    )  #since the entire library was written to work with PIL images (sorry)
    bounding_boxes, landmarks = detect_faces(frame, live_inference=True)
    frame = np.ascontiguousarray(
        frame
    )  #since we changed it to a PIL image, so change back to [H, W, 3]

    for box_idx, box in enumerate(bounding_boxes):
        cropped_face = frame[int(box[1]):int(
            box[3]
        ), int(box[0]):int(
            box[2]
        ), :]  #maybe add a +/- 10 pixels here in case the bounding boxes are too strict
        aligned_face = alignment(
            cropped_face, landmarks[box_idx]
        )  #crop and align the face to the preset landmark locations
        aligned_face = aligned_face.reshape((1, 3, 112, 96))
def verification():
    saved_model = './ArcFace/model/068.pth'
    name_list = os.listdir('./users')
    path_list = [os.path.join('./users', i, '%s.txt' % (i)) for i in name_list]
    total_features = np.empty((128, ), np.float32)
    people_num = len(path_list)

    font = ImageFont.truetype('simhei.ttf', 20, encoding='utf-8')

    if people_num > 1:
        are = 'are'
        people = 'people'
    else:
        are = 'is'
        people = 'person'
    print('start retore users information, there %s %d %s information' %
          (are, people_num, people))
    for i in path_list:
        temp = np.loadtxt(i)
        total_features = np.vstack((total_features, temp))
    total_features = total_features[1:]

    # threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
    threshold = 0.5
    model = mobileFaceNet()
    model.load_state_dict(t.load(saved_model)['backbone_net_list'])
    model.eval()
    use_cuda = t.cuda.is_available() and True
    device = t.device("cuda" if use_cuda else "cpu")

    # is_cuda_avilable
    trans = transforms.Compose([
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    model.to(device)

    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print('failed open camara!!!')
    ret, frame = cap.read()

    while ret:
        frame = frame[:, :, ::-1]
        img = Image.fromarray(frame)
        bboxes, landmark = detect_faces(img)
        # print(bbox)  # [[296.89171371 211.27569699 441.8924298  396.48678774   0.99999869]]

        if len(bboxes) == 0:
            cv2.imshow('img', frame[:, :, ::-1])
            # videoWriter.write(frame[:,:,::-1])
            cv2.waitKey(10)
            ret, frame = cap.read()
            continue

        show_img = frame.copy()
        for bbox in bboxes:
            loc_x_y = [bbox[2], bbox[1]]
            person_img = frame[int(bbox[1]):int(bbox[3]),
                               int(bbox[0]):int(bbox[2])].copy()
            feature = np.squeeze(get_feature(person_img, model, trans, device))
            cos_distance = cosin_metric(total_features, feature)
            index = np.argmax(cos_distance)
            if not cos_distance[index] > threshold:
                ret, frame = cap.read()
                continue
            person = name_list[index]
            show_img = draw_ch_zn(show_img, person, font, loc_x_y)
            cv2.rectangle(show_img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 0, 255))

        cv2.imshow('img', show_img[:, :, ::-1])

        if cv2.waitKey(10) & 0xFF == ord('q'):
            # videoWriter.release()
            break

        ret, frame = cap.read()
def main():
##########################################################################################################
    #preparation part
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()
    
    num_classes = 80
    
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    
    assert inp_dim % 32 == 0                   #assert后面语句为false时触发,中断程序
    assert inp_dim > 32

    if CUDA:
        model.cuda()
            
    model.eval()
    
    global confirm
    global person
    
    fps = 0.0
    count = 0
    frame = 0    
    person = []
    confirm = False
    reconfirm = False
    count_yolo = 0
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1) 
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True)

    cap = cv2.VideoCapture(0)

    detect_time = []
    recogn_time = []
    kalman_time = []
    aux_time = []
    while True:
        start = time.time()  
        ret, color_image = cap.read()
        '''
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())
        '''
        if color_image is None:
            break
        img, orig_im, dim = prep_image(color_image, inp_dim)
        
        im_dim = torch.FloatTensor(dim).repeat(1,2)             
##########################################################################################################
        #people detection part                
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        time_a = time.time()
        if count_yolo %3 == 0:                                                               #detect people every 3 frames
            output = model(Variable(img), CUDA)                         #适配后的图像放进yolo网络中,得到检测的结果
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)         


            if type(output) == int:
                fps  = ( fps + (1./(time.time()-start)) ) / 2
                print("fps= %f"%(fps))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
        
            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim                #夹紧张量,限制在一个区间内
        
            #im_dim = im_dim.repeat(output.size(0), 1)
            output[:,[1,3]] *= color_image.shape[1]
            output[:,[2,4]] *= color_image.shape[0]
            output = output.cpu().numpy() 
            output = sellect_person(output)                                       #把标签不是人的output去掉,减少计算量
            output = np.array(output)
            output_update = output
        elif count_yolo %3 != 0:
            output = output_update
        count_yolo += 1
        list(map(lambda x: write(x, orig_im), output))                #把结果加到原来的图像中   
        #output的[0,1:4]分别为框的左上和右下的点的位置
        detect_time.append(time.time() - time_a)
##########################################################################################################
        time_a = time.time()
        #kalman filter part
        outputs_tlwh = to_tlwh(output)                             ##把output数据变成适合kalman更新的类型
        features = encoder(orig_im,outputs_tlwh)
        detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            box = track.to_tlbr()
            cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2)
            cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2)  
        
        kalman_time.append(time.time() - time_a)
##########################################################################################################
        #face recognition part
        time_a = time.time()
        if confirm == False:
            saved_model = './ArcFace/model/068.pth'
            name_list = os.listdir('./users')
            path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list]
            total_features = np.empty((128,),np.float32)

            for i in path_list:
                temp = np.loadtxt(i)
                total_features = np.vstack((total_features,temp))
            total_features = total_features[1:]

            #threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
            threshold = 0.5
            model_facenet = mobileFaceNet()
            model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
            model_facenet.eval()
            #use_cuda = torch.cuda.is_available() and True
            #device = torch.device("cuda" if use_cuda else "cpu")
            device = torch.device("cuda")

            # is_cuda_avilable
            trans = transforms.Compose([
                transforms.Resize((112,112)),
                transforms.ToTensor(),
                transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
            ])
            model_facenet.to(device)

            img = Image.fromarray(color_image)
            bboxes, landmark = detect_faces(img)                                                                  #首先检测脸

            if len(bboxes) == 0:
                print('detect no people')
            else:
                for bbox in bboxes:
                    loc_x_y = [bbox[2], bbox[1]]
                    person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy()              #从图像中截取框
                    feature = np.squeeze(get_feature(person_img, model_facenet, trans, device))                               #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if  cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]  
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im,person,font,loc_x_y)                                                                    #加名字
                    cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))           #加box
            #cv2.imshow("frame", orig_im)

##########################################################################################################
            #confirmpart
            print('confirmation rate: {} %'.format(count*10))
            cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
            if len(bboxes)!=0 and len(output)!=0:
                if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person:
                    count+=1
            frame+=1
            if count>=10 and frame<=30:
                confirm = True
                print('confirm the face is belong to that people')
            elif  frame >= 30:
                print('fail confirm, and start again')
                reconfirm = True
                count = 0
                frame = 0
            if reconfirm == True:
                cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)                   
##########################################################################################################
        recogn_time.append(time.time() - time_a)
        time_a = time.time()
        #show the final output result
        if not confirm:
            cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20),
                                     cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2)
        #把识别的名字加上去
        if confirm:  
            for track in tracker.tracks:
                bbox = track.to_tlbr()
                if track.track_id == 1:
                    cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20),
                                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
                
                    #rate.sleep()
        cv2.imshow("frame", orig_im)
        #out.write(orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        
        aux_time.append(time.time()-time_a)
        fps  = ( fps + (1./(time.time()-start)) ) / 2
        print("fps= %f"%(fps))
    #calculate how long each part takes
    avg_detect_time = np.mean(detect_time)
    avg_recogn_time = np.mean(recogn_time)
    avg_kalman_time = np.mean(kalman_time)
    avg_aux_time = np.mean(aux_time)
    print("avg detect: {}".format(avg_detect_time))
    print("avg recogn: {}".format(avg_recogn_time))
    print("avg kalman: {}".format(avg_kalman_time))
    print("avg aux: {}".format(avg_aux_time))
    print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
def main():
    ##########################################################################################################
    #preparation part

    with open('config/config.json', 'r') as f:
        cfg = json.load(f)

    confidence = float(0.25)
    nms_thesh = float(0.4)
    CUDA = torch.cuda.is_available()

    model = Darknet(cfgfile)
    model.load_weights(weightsfile)

    model.net_info["height"] = "160"
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0  #assert后面语句为false时触发,中断程序
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    #Kalman Filter
    tracker = Tracker(dist_thresh=160,
                      max_frames_to_skip=100,
                      max_trace_length=5,
                      trackIdCount=1)

    saved_model = 'ArcFace/model/068.pth'
    name_list = os.listdir('users')
    path_list = [os.path.join('users', i, '%s.txt' % (i)) for i in name_list]
    total_features = np.empty((128, ), np.float32)

    for i in path_list:
        temp = np.loadtxt(i)
        total_features = np.vstack((total_features, temp))
    total_features = total_features[1:]

    # threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
    threshold = 0.5
    model_facenet = mobileFaceNet()
    model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
    model_facenet.eval()
    use_cuda = torch.cuda.is_available() and True
    device = torch.device("cuda" if use_cuda else "cpu")
    # device = torch.device("cuda")

    # is_cuda_avilable
    trans = transforms.Compose([
        transforms.Resize((112, 112)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    model_facenet.to(device)

    global person

    fps = 0.0
    count = 0
    frame = 0
    person = []
    count_yolo = 0
    '''
    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output/test.avi',fourcc, 15.0, (640,480),True)
    '''
    cap = cv2.VideoCapture('test_video/test.avi')

    detect_time = []
    recogn_time = []
    kalman_time = []
    aux_time = []
    while True:
        start = time.time()
        ret, color_image = cap.read()
        if color_image is None:
            break

        img, orig_im, dim = prep_image(color_image, inp_dim)

        im_dim = torch.FloatTensor(dim).repeat(1, 2)

        ##################################################################################################
        #people detection part
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()

        time_a = time.time()

        output = model(Variable(img), CUDA)  #适配后的图像放进yolo网络中,得到检测的结果
        output = write_results(output,
                               confidence,
                               num_classes,
                               nms=True,
                               nms_conf=nms_thesh)

        if type(output) == int:
            fps = (fps + (1. / (time.time() - start))) / 2
            print("fps= %f" % (fps))
            cv2.imshow("frame", orig_im)
            key = cv2.waitKey(1)
            if key & 0xFF == ord('q'):
                break
            continue

        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                     float(inp_dim)) / inp_dim  #夹紧张量,限制在一个区间内

        #im_dim = im_dim.repeat(output.size(0), 1)
        output[:, [1, 3]] *= color_image.shape[1]
        output[:, [2, 4]] *= color_image.shape[0]
        output = output.cpu().numpy()
        output = sellect_person(output)  #把标签不是人的output去掉,减少计算量
        output = np.array(output)

        count_yolo += 1
        list(map(lambda x: write(x, orig_im), output))  #把结果加到原来的图像中
        #output的[0,1:4]分别为框的左上和右下的点的位置
        detect_time.append(time.time() - time_a)
        ###########################################################################################################
        # face recognition part
        time_a = time.time()
        for person_bbox in output:
            #draw the bounding box
            top, left, down, right = [int(x) for x in person_bbox[1:5]]
            if left >= right or top >= down:
                continue
            person_img = color_image[left:right, top:down].copy()

            img = Image.fromarray(person_img)
            bboxes, landmark = detect_faces(img)  #首先检测脸

            if len(bboxes) == 0:
                print('detect no face')
            else:
                print('detect face!!!!!')
                for bbox in bboxes:
                    cv2.rectangle(orig_im,
                                  (int(bbox[0] + top), int(bbox[1] + left)),
                                  (int(bbox[2] + top), int(bbox[3] + left)),
                                  (0, 0, 255))  # 加box
                    loc_x_y = [bbox[2] + top, bbox[1] + left]
                    face_img = person_img[
                        int(bbox[1]):int(bbox[3]),
                        int(bbox[0]):int(bbox[2])].copy()  #从图像中截取框
                    feature = np.squeeze(
                        get_feature(face_img, model_facenet, trans,
                                    device))  #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im, person, font, loc_x_y)  #加名字

        print('timetimetimetotal  ', time.time() - time_a)
        ###############################################################################################################
        time_a = time.time()
        '''
        #show the final output result
        for output_kalman_normal in outputs_kalman_normal:
            if output_kalman_normal[4] == 1:
                cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20),
                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
        '''

        #out.write(orig_im)
        cv2.imshow("frame", orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break

        aux_time.append(time.time() - time_a)
        fps = (fps + (1. / (time.time() - start))) / 2
        print("fps= %f" % (fps))

    avg_detect_time = np.mean(detect_time)
    avg_recogn_time = np.mean(recogn_time)
    avg_kalman_time = np.mean(kalman_time)
    avg_aux_time = np.mean(aux_time)
    print("avg detect: {}".format(avg_detect_time))
    print("avg recogn: {}".format(avg_recogn_time))
    print("avg kalman: {}".format(avg_kalman_time))
    print("avg aux: {}".format(avg_aux_time))
    print("avg fps: {}".format(
        1 /
        (avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
Beispiel #8
0
def main():
##########################################################################################################
    #preparation part
    args = arg_parse()
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()
    
    model = Darknet(cfgfile)
    model.load_weights(weightsfile)
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    
    assert inp_dim % 32 == 0                   #assert后面语句为false时触发,中断程序
    assert inp_dim > 32
    
    if CUDA:
        model.cuda()
   
    model.eval()

    #Kalman Filter
    tracker = Tracker(dist_thresh = 160, max_frames_to_skip = 100, 
                                        max_trace_length = 5, trackIdCount = 1)
    
    global confirm
    global person
    
    fps = 0.0
    count = 0
    frame = 0    
    person = []
    confirm = False
    reconfirm = False
    count = 0                       #每3帧进行一次检测,更新人物位置,其余情况下位置保持不变

    #record the video
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output/output_kalman111.avi',fourcc, 18.0, (640,360),True)

    #cap = cv2.VideoCapture(0)

    while True:
        start = time.time()  
        align_to = rs.stream.color
        align = rs.align(align_to)
        frames = pipeline.wait_for_frames()
        aligned_frames = align.process(frames)
        depth_frame = aligned_frames.get_depth_frame()
        color_frame = aligned_frames.get_color_frame()
        #ret, color_image = cap.read()

        #可以使画面平滑的filter
        spatial = rs.spatial_filter()
        spatial.set_option(rs.option.filter_magnitude, 5)
        spatial.set_option(rs.option.filter_smooth_alpha, 0.5)
        spatial.set_option(rs.option.filter_smooth_delta, 20)
        spatial.set_option(rs.option.holes_fill, 3)
        filtered_depth = spatial.process(depth_frame)
        
        #填补空洞的filter
        hole_filling = rs.hole_filling_filter()
        hole_filling.set_option(rs.option.holes_fill,2)
        filled_depth = hole_filling.process(filtered_depth)      
        
        color_image = np.asanyarray(color_frame.get_data())
        depth_image = np.asanyarray(filled_depth.get_data())   
        depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)

        img, orig_im, dim = prep_image(color_image, inp_dim)
        
        im_dim = torch.FloatTensor(dim).repeat(1,2)  
                
##################################################################################################
        #people detection part                
        if CUDA:
            im_dim = im_dim.cuda()
            img = img.cuda()
        
        if count %3 == 0:
            output = model(Variable(img), CUDA)                         #适配后的图像放进yolo网络中,得到检测的结果
            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)   

            
            if type(output) == int:
                fps  = ( fps + (1./(time.time()-start)) ) / 2
                print("fps= %f"%(fps))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue
            
            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim                #夹紧张量,限制在一个区间内
            
            #im_dim = im_dim.repeat(output.size(0), 1)
            output[:,[1,3]] *= color_image.shape[1]
            output[:,[2,4]] *= color_image.shape[0]
            output = output.cpu().numpy() 
            output = sellect_person(output)                                       #把标签不是人的output去掉,减少计算量
            output = np.array(output)

            output_update = output
        elif count%1 !=0:
            output = output_update
        count +=1
        #list(map(lambda x: write(x, orig_im), output))                #把结果加到原来的图像中   
        #output的[0,1:4]分别为框的左上和右下的点的位置
###########################################################################################################
        #kalman filter tracking part

        output_kalman_xywh = to_xy(output)                   #把output数据变成适合kalman更新的类型
        if (len(output_kalman_xywh) > 0):
            tracker.Update(output_kalman_xywh)                #用kalman filter更新框的位置
        
        outputs_kalman_normal = np.array(xy_to_normal(output,tracker.tracks)) #换回原来的数据形式
        #画框
        for output_kalman_normal in outputs_kalman_normal:
            cv2.rectangle(orig_im, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), 
                                        (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2)
            cv2.rectangle(depth_colormap, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), 
                                        (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2)
            cv2.putText(orig_im, str(output_kalman_normal[4]),(int(output_kalman_normal[0]), int(output_kalman_normal[1])),
                                    0, 5e-3 * 200, (0,255,0),2)              #track id 就是数字  

#tracker.tracks[i].track_id
########################################################################################################
        #face recognition part

        if confirm == False:

            saved_model = './ArcFace/model/068.pth'
            name_list = os.listdir('./users')
            path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list]
            total_features = np.empty((128,),np.float32)

            for i in path_list:
                temp = np.loadtxt(i)
                total_features = np.vstack((total_features,temp))
            total_features = total_features[1:]

            #threshold = 0.30896     #阈值并不合适,可能是因为训练集和测试集的差异所致!!!
            threshold = 0.5
            model_facenet = mobileFaceNet()
            model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list'])
            model_facenet.eval()
            #use_cuda = torch.cuda.is_available() and True
            #device = torch.device("cuda" if use_cuda else "cpu")
            device = torch.device("cuda")

            # is_cuda_avilable
            trans = transforms.Compose([
                transforms.Resize((112,112)),
                transforms.ToTensor(),
                transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
            ])
            model_facenet.to(device)

            img = Image.fromarray(color_image)
            bboxes, landmark = detect_faces(img)                                                                  #首先检测脸

            if len(bboxes) == 0:
                print('detect no people')
            else:
                for bbox in bboxes:
                    loc_x_y = [bbox[2], bbox[1]]
                    person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy()              #从图像中截取框
                    feature = np.squeeze(get_feature(person_img, model_facenet, trans, device))                               #框里的图像计算feature
                    cos_distance = cosin_metric(total_features, feature)
                    index = np.argmax(cos_distance)
                    if  cos_distance[index] <= threshold:
                        continue
                    person = name_list[index]  
                    #在这里加框加文字
                    orig_im = draw_ch_zn(orig_im,person,font,loc_x_y)                                                                    #加名字
                    cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255))           #加box
            #cv2.imshow("frame", orig_im)
############################################################################################################
            #confirmpart
            print('confirmation rate: {} %'.format(count*10))
            cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*2.5), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
            if len(bboxes)!=0 and len(output)!=0:
                if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person:
                    count+=1
                frame+=1
            if count>=40 and frame<=100:
                confirm = True
                print('confirm the face is belong to that people')
            elif  frame >= 100:
                print('fail confirm, and start again')
                reconfirm = True
                count = 0
                frame = 0
            if reconfirm == True:
                cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)                   

###############################################################################################################
        #show the final output result
        if not confirm:
            cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20),
                                     cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2)
        if confirm:
            for output_kalman_normal in outputs_kalman_normal:
                if output_kalman_normal[4] == 1:
                    cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20),
                                            cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2)
                    dist_info = get_dist_info(depth_image,output_kalman_normal)
                    #orig_im = clip_rest(color_image,depth_image,dist_info)
                    #depth_colormap = add_dist_info(depth_colormap,bbox,dist_info)
                    orig_im = add_dist_info(orig_im,output_kalman_normal,dist_info)
        #images = np.hstack((orig_im, depth_colormap))
        cv2.imshow("result", orig_im)
        out.write(orig_im)
        key = cv2.waitKey(1)
        if key & 0xFF == ord('q'):
            break
        
        fps  = ( fps + (1./(time.time()-start)) ) / 2
        print("fps= %f"%(fps))
Beispiel #9
0
from ArcFace.mobile_model import mobileFaceNet
from mtcnn.src import detect_faces, show_bboxes
import torch as t
from PIL import Image
import numpy as np
import cv2
saved_model = './ArcFace/model/068.pth'
threshold = 0.30896
model = mobileFaceNet()
model.load_state_dict(t.load(saved_model)['backbone_net_list'])
model.eval()
# is_cuda_avilable

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print('failed open camara!!!')
ret, frame = cap.read()
while ret:
    frame = frame[:, :, ::-1]
    img = Image.fromarray(frame)
    bboxes, landmark = detect_faces(img)
    show_img = show_bboxes(img, bboxes, landmark)
    show_img = np.array(show_img)[:, :, ::-1]
    cv2.imshow('img', show_img)
    cv2.waitKey(30)
    ret, frame = cap.read()