def save_person_information(name): saved_model = './ArcFace/model/068.pth' info_path = './users/' + name if not os.path.exists(info_path): os.makedirs(info_path) # threshold = 0.30896 model = mobileFaceNet() model.load_state_dict(t.load(saved_model)['backbone_net_list']) model.eval() use_cuda = t.cuda.is_available() and True device = t.device("cuda" if use_cuda else "cpu") # is_cuda_avilableqq trans = transforms.Compose([ transforms.Resize((112, 112)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) model.to(device) cap = cv2.VideoCapture(0) if not cap.isOpened(): print('failed open camara!!!') ret, frame = cap.read() while ret: frame = frame[:, :, ::-1] img = Image.fromarray(frame) bboxes, landmark = detect_faces(img) show_img = show_bboxes(img, bboxes, landmark) show_img = np.array(show_img)[:, :, ::-1] show_img = show_img.copy() cv2.putText(show_img, "press 'c' to crop your face", (0, 50), cv2.FONT_HERSHEY_PLAIN, 2, [255, 0, 0], 2) cv2.imshow('img', show_img) # 480 640 3 if cv2.waitKey(1) & 0xFF == ord('c'): person_img = frame[int(bboxes[0, 1]):int(bboxes[0, 3]), int(bboxes[0, 0]):int(bboxes[0, 2])] cv2.imshow('crop', person_img[:, :, ::-1]) cv2.imwrite(os.path.join(info_path, '%s.jpg' % (name)), person_img[:, :, ::-1]) feature = np.squeeze(get_feature(person_img, model, trans, device)) np.savetxt(os.path.join(info_path, '%s.txt' % (name)), feature) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break ret, frame = cap.read()
def detect(): videoCapture = cv2.VideoCapture(args.input_path) fps = videoCapture.get(cv2.CAP_PROP_FPS) size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))) videoWriter = cv2.VideoWriter(args.output_path, cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), fps, size) success, img1 = videoCapture.read() img2 = Image.fromarray(img1) while success: bounding_boxes, _ = detect_faces(img2) if len(bounding_boxes) != 0: gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) for i in range(0, len(bounding_boxes)): if bounding_boxes[i, 4] < 0.99: continue x1, y1, x2, y2 = int(bounding_boxes[i, 0]), int( bounding_boxes[i, 1]), int(bounding_boxes[i, 2]), int( bounding_boxes[i, 3]) img1 = cv2.rectangle(img1, (x1, y1), (x2, y2), (255, 0, 0), 2) roi_gray = gray[y1:y2, x1:x2] # print(bounding_boxes[i, 4]) f = cv2.resize(roi_gray, (img_size, img_size)) f = f.reshape(1, 1, img_size, img_size) f = Variable(torch.cuda.FloatTensor(f)) output = net(f) _, label = torch.max(output.data, 1) label = label.cpu().numpy() if (label == 0): cv2.putText(img1, 'Woman', (x1, y1 - 20), cv2.FONT_HERSHEY_TRIPLEX, 1, 255, 2) elif (label == 1): cv2.putText(img1, 'Man', (x1, y1 - 20), cv2.FONT_HERSHEY_TRIPLEX, 1, 255, 2) videoWriter.write(img1) success, img1 = videoCapture.read() img2 = Image.fromarray(img1) else: videoWriter.write(img1) uccess, img1 = videoCapture.read() img2 = Image.fromarray(img1)
def main(): ########################################################################################################## #preparation part confidence = float(0.25) nms_thesh = float(0.4) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = "160" inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() #Kalman Filter tracker = Tracker(dist_thresh = 160, max_frames_to_skip = 100, max_trace_length = 5, trackIdCount = 1) global confirm global person fps = 0.0 count = 0 frame = 0 person = [] confirm = False reconfirm = False count_yolo = 0 ''' #record the video fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True) ''' cap = cv2.VideoCapture('test_video/test.avi') detect_time = [] recogn_time = [] kalman_time = [] aux_time = [] while True: start = time.time() ret, color_image = cap.read() ''' frames = pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) ''' if color_image is None: break img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) ################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() time_a = time.time() if count_yolo %3 ==0: output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= color_image.shape[1] output[:,[2,4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) output_update = output elif count_yolo %3 != 0: output = output_update count_yolo += 1 list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 detect_time.append(time.time() - time_a) ########################################################################################################### #kalman filter tracking part time_a = time.time() output_kalman_xywh = to_xy(output) #把output数据变成适合kalman更新的类型 if (len(output_kalman_xywh) > 0): tracker.Update(output_kalman_xywh) #用kalman filter更新框的位置 outputs_kalman_normal = np.array(xy_to_normal(output,tracker.tracks)) #换回原来的数据形式 #画框 for output_kalman_normal in outputs_kalman_normal: cv2.rectangle(orig_im, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2) cv2.putText(orig_im, str(output_kalman_normal[4]),(int(output_kalman_normal[0]), int(output_kalman_normal[1])), 0, 5e-3 * 200, (0,255,0),2) #track id 就是数字 kalman_time.append(time.time() - time_a) #tracker.tracks[i].track_id ######################################################################################################## #face recognition part time_a = time.time() if confirm == False: saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list] total_features = np.empty((128,),np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features,temp)) total_features = total_features[1:] #threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() #use_cuda = torch.cuda.is_available() and True #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112,112)), transforms.ToTensor(), transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) ]) model_facenet.to(device) img = Image.fromarray(color_image) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no people') else: for bbox in bboxes: print(bbox[:4]) loc_x_y = [bbox[2], bbox[1]] person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze(get_feature(person_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im,person,font,loc_x_y) #加名字 cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) #加box #cv2.imshow("frame", orig_im) ############################################################################################################ #confirmpart print('confirmation rate: {} %'.format(count*10)) cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) if len(bboxes)!=0 and len(output)!=0: if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person: count+=1 frame+=1 if count>=10 and frame<=30: confirm = True print('confirm the face is belong to that people') elif frame >= 30: print('fail confirm, and start again') reconfirm = True count = 0 frame = 0 if reconfirm == True: cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) recogn_time.append(time.time() - time_a) ############################################################################################################### time_a = time.time() #show the final output result if not confirm: cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2) if confirm: for output_kalman_normal in outputs_kalman_normal: if output_kalman_normal[4] == 1: cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) #dist_info = get_dist_info(depth_image,bbox) #深度信息z #orig_im = add_dist_info(orig_im,bbox,dist_info) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break aux_time.append(time.time()-time_a) fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) avg_detect_time = np.mean(detect_time) avg_recogn_time = np.mean(recogn_time) avg_kalman_time = np.mean(kalman_time) avg_aux_time = np.mean(aux_time) print("avg detect: {}".format(avg_detect_time)) print("avg recogn: {}".format(avg_recogn_time)) print("avg kalman: {}".format(avg_kalman_time)) print("avg aux: {}".format(avg_aux_time)) print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
net = faceNet.faceNet_BN(classnum=10576, m=opt.marginFactor) state_dict = torch.load('./cosFace/checkpoint/netFinal_8.pth') net.load_state_dict(state_dict) net = net.cuda(0) video_capture = cv2.VideoCapture(0) while True: _, frame = video_capture.read() frame = cv2.cvtColor( frame, cv2.COLOR_BGR2RGB ) #the model was indeed trained on BGR but all my alignment functions reverse it. So deal with it frame = Image.fromarray( np.uint8(frame) ) #since the entire library was written to work with PIL images (sorry) bounding_boxes, landmarks = detect_faces(frame, live_inference=True) frame = np.ascontiguousarray( frame ) #since we changed it to a PIL image, so change back to [H, W, 3] for box_idx, box in enumerate(bounding_boxes): cropped_face = frame[int(box[1]):int( box[3] ), int(box[0]):int( box[2] ), :] #maybe add a +/- 10 pixels here in case the bounding boxes are too strict aligned_face = alignment( cropped_face, landmarks[box_idx] ) #crop and align the face to the preset landmark locations aligned_face = aligned_face.reshape((1, 3, 112, 96))
def verification(): saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users', i, '%s.txt' % (i)) for i in name_list] total_features = np.empty((128, ), np.float32) people_num = len(path_list) font = ImageFont.truetype('simhei.ttf', 20, encoding='utf-8') if people_num > 1: are = 'are' people = 'people' else: are = 'is' people = 'person' print('start retore users information, there %s %d %s information' % (are, people_num, people)) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features, temp)) total_features = total_features[1:] # threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model = mobileFaceNet() model.load_state_dict(t.load(saved_model)['backbone_net_list']) model.eval() use_cuda = t.cuda.is_available() and True device = t.device("cuda" if use_cuda else "cpu") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112, 112)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) model.to(device) cap = cv2.VideoCapture(0) if not cap.isOpened(): print('failed open camara!!!') ret, frame = cap.read() while ret: frame = frame[:, :, ::-1] img = Image.fromarray(frame) bboxes, landmark = detect_faces(img) # print(bbox) # [[296.89171371 211.27569699 441.8924298 396.48678774 0.99999869]] if len(bboxes) == 0: cv2.imshow('img', frame[:, :, ::-1]) # videoWriter.write(frame[:,:,::-1]) cv2.waitKey(10) ret, frame = cap.read() continue show_img = frame.copy() for bbox in bboxes: loc_x_y = [bbox[2], bbox[1]] person_img = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() feature = np.squeeze(get_feature(person_img, model, trans, device)) cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if not cos_distance[index] > threshold: ret, frame = cap.read() continue person = name_list[index] show_img = draw_ch_zn(show_img, person, font, loc_x_y) cv2.rectangle(show_img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255)) cv2.imshow('img', show_img[:, :, ::-1]) if cv2.waitKey(10) & 0xFF == ord('q'): # videoWriter.release() break ret, frame = cap.read()
def main(): ########################################################################################################## #preparation part args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() global confirm global person fps = 0.0 count = 0 frame = 0 person = [] confirm = False reconfirm = False count_yolo = 0 model_filename = 'model_data/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename,batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) #record the video fourcc = cv2.VideoWriter_fourcc(*'XVID') #out = cv2.VideoWriter('output/testwrite_normal.avi',fourcc, 15.0, (640,480),True) cap = cv2.VideoCapture(0) detect_time = [] recogn_time = [] kalman_time = [] aux_time = [] while True: start = time.time() ret, color_image = cap.read() ''' frames = pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) ''' if color_image is None: break img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) ########################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() time_a = time.time() if count_yolo %3 == 0: #detect people every 3 frames output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= color_image.shape[1] output[:,[2,4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) output_update = output elif count_yolo %3 != 0: output = output_update count_yolo += 1 list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 detect_time.append(time.time() - time_a) ########################################################################################################## time_a = time.time() #kalman filter part outputs_tlwh = to_tlwh(output) ##把output数据变成适合kalman更新的类型 features = encoder(orig_im,outputs_tlwh) detections = [Detection(output_tlwh, 1.0, feature) for output_tlwh, feature in zip(outputs_tlwh, features)] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue box = track.to_tlbr() cv2.rectangle(orig_im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),(255,255,255), 2) cv2.putText(orig_im, str(track.track_id),(int(box[0]), int(box[1])),0, 5e-3 * 200, (0,255,0),2) kalman_time.append(time.time() - time_a) ########################################################################################################## #face recognition part time_a = time.time() if confirm == False: saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list] total_features = np.empty((128,),np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features,temp)) total_features = total_features[1:] #threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() #use_cuda = torch.cuda.is_available() and True #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112,112)), transforms.ToTensor(), transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) ]) model_facenet.to(device) img = Image.fromarray(color_image) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no people') else: for bbox in bboxes: loc_x_y = [bbox[2], bbox[1]] person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze(get_feature(person_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im,person,font,loc_x_y) #加名字 cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) #加box #cv2.imshow("frame", orig_im) ########################################################################################################## #confirmpart print('confirmation rate: {} %'.format(count*10)) cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*10), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) if len(bboxes)!=0 and len(output)!=0: if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person: count+=1 frame+=1 if count>=10 and frame<=30: confirm = True print('confirm the face is belong to that people') elif frame >= 30: print('fail confirm, and start again') reconfirm = True count = 0 frame = 0 if reconfirm == True: cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) ########################################################################################################## recogn_time.append(time.time() - time_a) time_a = time.time() #show the final output result if not confirm: cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2) #把识别的名字加上去 if confirm: for track in tracker.tracks: bbox = track.to_tlbr() if track.track_id == 1: cv2.putText(orig_im, person, (int(bbox[0])+100,int(bbox[1])+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) #rate.sleep() cv2.imshow("frame", orig_im) #out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break aux_time.append(time.time()-time_a) fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) #calculate how long each part takes avg_detect_time = np.mean(detect_time) avg_recogn_time = np.mean(recogn_time) avg_kalman_time = np.mean(kalman_time) avg_aux_time = np.mean(aux_time) print("avg detect: {}".format(avg_detect_time)) print("avg recogn: {}".format(avg_recogn_time)) print("avg kalman: {}".format(avg_kalman_time)) print("avg aux: {}".format(avg_aux_time)) print("avg fps: {}".format(1/(avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
def main(): ########################################################################################################## #preparation part with open('config/config.json', 'r') as f: cfg = json.load(f) confidence = float(0.25) nms_thesh = float(0.4) CUDA = torch.cuda.is_available() model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = "160" inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() #Kalman Filter tracker = Tracker(dist_thresh=160, max_frames_to_skip=100, max_trace_length=5, trackIdCount=1) saved_model = 'ArcFace/model/068.pth' name_list = os.listdir('users') path_list = [os.path.join('users', i, '%s.txt' % (i)) for i in name_list] total_features = np.empty((128, ), np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features, temp)) total_features = total_features[1:] # threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() use_cuda = torch.cuda.is_available() and True device = torch.device("cuda" if use_cuda else "cpu") # device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112, 112)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) model_facenet.to(device) global person fps = 0.0 count = 0 frame = 0 person = [] count_yolo = 0 ''' #record the video fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output/test.avi',fourcc, 15.0, (640,480),True) ''' cap = cv2.VideoCapture('test_video/test.avi') detect_time = [] recogn_time = [] kalman_time = [] aux_time = [] while True: start = time.time() ret, color_image = cap.read() if color_image is None: break img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) ################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() time_a = time.time() output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: fps = (fps + (1. / (time.time() - start))) / 2 print("fps= %f" % (fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= color_image.shape[1] output[:, [2, 4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) count_yolo += 1 list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 detect_time.append(time.time() - time_a) ########################################################################################################### # face recognition part time_a = time.time() for person_bbox in output: #draw the bounding box top, left, down, right = [int(x) for x in person_bbox[1:5]] if left >= right or top >= down: continue person_img = color_image[left:right, top:down].copy() img = Image.fromarray(person_img) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no face') else: print('detect face!!!!!') for bbox in bboxes: cv2.rectangle(orig_im, (int(bbox[0] + top), int(bbox[1] + left)), (int(bbox[2] + top), int(bbox[3] + left)), (0, 0, 255)) # 加box loc_x_y = [bbox[2] + top, bbox[1] + left] face_img = person_img[ int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze( get_feature(face_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im, person, font, loc_x_y) #加名字 print('timetimetimetotal ', time.time() - time_a) ############################################################################################################### time_a = time.time() ''' #show the final output result for output_kalman_normal in outputs_kalman_normal: if output_kalman_normal[4] == 1: cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) ''' #out.write(orig_im) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break aux_time.append(time.time() - time_a) fps = (fps + (1. / (time.time() - start))) / 2 print("fps= %f" % (fps)) avg_detect_time = np.mean(detect_time) avg_recogn_time = np.mean(recogn_time) avg_kalman_time = np.mean(kalman_time) avg_aux_time = np.mean(aux_time) print("avg detect: {}".format(avg_detect_time)) print("avg recogn: {}".format(avg_recogn_time)) print("avg kalman: {}".format(avg_kalman_time)) print("avg aux: {}".format(avg_aux_time)) print("avg fps: {}".format( 1 / (avg_detect_time + avg_recogn_time + avg_kalman_time + avg_aux_time)))
def main(): ########################################################################################################## #preparation part args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 #assert后面语句为false时触发,中断程序 assert inp_dim > 32 if CUDA: model.cuda() model.eval() #Kalman Filter tracker = Tracker(dist_thresh = 160, max_frames_to_skip = 100, max_trace_length = 5, trackIdCount = 1) global confirm global person fps = 0.0 count = 0 frame = 0 person = [] confirm = False reconfirm = False count = 0 #每3帧进行一次检测,更新人物位置,其余情况下位置保持不变 #record the video fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output/output_kalman111.avi',fourcc, 18.0, (640,360),True) #cap = cv2.VideoCapture(0) while True: start = time.time() align_to = rs.stream.color align = rs.align(align_to) frames = pipeline.wait_for_frames() aligned_frames = align.process(frames) depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() #ret, color_image = cap.read() #可以使画面平滑的filter spatial = rs.spatial_filter() spatial.set_option(rs.option.filter_magnitude, 5) spatial.set_option(rs.option.filter_smooth_alpha, 0.5) spatial.set_option(rs.option.filter_smooth_delta, 20) spatial.set_option(rs.option.holes_fill, 3) filtered_depth = spatial.process(depth_frame) #填补空洞的filter hole_filling = rs.hole_filling_filter() hole_filling.set_option(rs.option.holes_fill,2) filled_depth = hole_filling.process(filtered_depth) color_image = np.asanyarray(color_frame.get_data()) depth_image = np.asanyarray(filled_depth.get_data()) depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET) img, orig_im, dim = prep_image(color_image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) ################################################################################################## #people detection part if CUDA: im_dim = im_dim.cuda() img = img.cuda() if count %3 == 0: output = model(Variable(img), CUDA) #适配后的图像放进yolo网络中,得到检测的结果 output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim #夹紧张量,限制在一个区间内 #im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= color_image.shape[1] output[:,[2,4]] *= color_image.shape[0] output = output.cpu().numpy() output = sellect_person(output) #把标签不是人的output去掉,减少计算量 output = np.array(output) output_update = output elif count%1 !=0: output = output_update count +=1 #list(map(lambda x: write(x, orig_im), output)) #把结果加到原来的图像中 #output的[0,1:4]分别为框的左上和右下的点的位置 ########################################################################################################### #kalman filter tracking part output_kalman_xywh = to_xy(output) #把output数据变成适合kalman更新的类型 if (len(output_kalman_xywh) > 0): tracker.Update(output_kalman_xywh) #用kalman filter更新框的位置 outputs_kalman_normal = np.array(xy_to_normal(output,tracker.tracks)) #换回原来的数据形式 #画框 for output_kalman_normal in outputs_kalman_normal: cv2.rectangle(orig_im, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2) cv2.rectangle(depth_colormap, (int(output_kalman_normal[0]), int(output_kalman_normal[1])), (int(output_kalman_normal[2]), int(output_kalman_normal[3])),(255,255,255), 2) cv2.putText(orig_im, str(output_kalman_normal[4]),(int(output_kalman_normal[0]), int(output_kalman_normal[1])), 0, 5e-3 * 200, (0,255,0),2) #track id 就是数字 #tracker.tracks[i].track_id ######################################################################################################## #face recognition part if confirm == False: saved_model = './ArcFace/model/068.pth' name_list = os.listdir('./users') path_list = [os.path.join('./users',i,'%s.txt'%(i)) for i in name_list] total_features = np.empty((128,),np.float32) for i in path_list: temp = np.loadtxt(i) total_features = np.vstack((total_features,temp)) total_features = total_features[1:] #threshold = 0.30896 #阈值并不合适,可能是因为训练集和测试集的差异所致!!! threshold = 0.5 model_facenet = mobileFaceNet() model_facenet.load_state_dict(torch.load(saved_model)['backbone_net_list']) model_facenet.eval() #use_cuda = torch.cuda.is_available() and True #device = torch.device("cuda" if use_cuda else "cpu") device = torch.device("cuda") # is_cuda_avilable trans = transforms.Compose([ transforms.Resize((112,112)), transforms.ToTensor(), transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) ]) model_facenet.to(device) img = Image.fromarray(color_image) bboxes, landmark = detect_faces(img) #首先检测脸 if len(bboxes) == 0: print('detect no people') else: for bbox in bboxes: loc_x_y = [bbox[2], bbox[1]] person_img = color_image[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])].copy() #从图像中截取框 feature = np.squeeze(get_feature(person_img, model_facenet, trans, device)) #框里的图像计算feature cos_distance = cosin_metric(total_features, feature) index = np.argmax(cos_distance) if cos_distance[index] <= threshold: continue person = name_list[index] #在这里加框加文字 orig_im = draw_ch_zn(orig_im,person,font,loc_x_y) #加名字 cv2.rectangle(orig_im,(int(bbox[0]),int(bbox[1])),(int(bbox[2]),int(bbox[3])),(0,0,255)) #加box #cv2.imshow("frame", orig_im) ############################################################################################################ #confirmpart print('confirmation rate: {} %'.format(count*10)) cv2.putText(orig_im, 'confirmation rate: {} %'.format(count*2.5), (10,30),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) if len(bboxes)!=0 and len(output)!=0: if bboxes[0,0]>output[0,1] and bboxes[0,1]>output[0,2] and bboxes[0,2]<output[0,3] and bboxes[0,3]<output[0,4] and person: count+=1 frame+=1 if count>=40 and frame<=100: confirm = True print('confirm the face is belong to that people') elif frame >= 100: print('fail confirm, and start again') reconfirm = True count = 0 frame = 0 if reconfirm == True: cv2.putText(orig_im, 'fail confirm, and start again', (10,60),cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) ############################################################################################################### #show the final output result if not confirm: cv2.putText(orig_im, 'still not confirm', (output[0,1].astype(np.int32)+100,output[0,2].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,0,255], 2) if confirm: for output_kalman_normal in outputs_kalman_normal: if output_kalman_normal[4] == 1: cv2.putText(orig_im, person, (output_kalman_normal[0].astype(np.int32)+100,output_kalman_normal[1].astype(np.int32)+20), cv2.FONT_HERSHEY_PLAIN, 2, [0,255,0], 2) dist_info = get_dist_info(depth_image,output_kalman_normal) #orig_im = clip_rest(color_image,depth_image,dist_info) #depth_colormap = add_dist_info(depth_colormap,bbox,dist_info) orig_im = add_dist_info(orig_im,output_kalman_normal,dist_info) #images = np.hstack((orig_im, depth_colormap)) cv2.imshow("result", orig_im) out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break fps = ( fps + (1./(time.time()-start)) ) / 2 print("fps= %f"%(fps))
from ArcFace.mobile_model import mobileFaceNet from mtcnn.src import detect_faces, show_bboxes import torch as t from PIL import Image import numpy as np import cv2 saved_model = './ArcFace/model/068.pth' threshold = 0.30896 model = mobileFaceNet() model.load_state_dict(t.load(saved_model)['backbone_net_list']) model.eval() # is_cuda_avilable cap = cv2.VideoCapture(0) if not cap.isOpened(): print('failed open camara!!!') ret, frame = cap.read() while ret: frame = frame[:, :, ::-1] img = Image.fromarray(frame) bboxes, landmark = detect_faces(img) show_img = show_bboxes(img, bboxes, landmark) show_img = np.array(show_img)[:, :, ::-1] cv2.imshow('img', show_img) cv2.waitKey(30) ret, frame = cap.read()