classes_path = 'data/coco_classes.txt' # model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。 # model_path = 'yolov4.h5' model_path = './weights/step00001000.h5' # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) input_shape = (416, 416) # input_shape = (608, 608) num_anchors = 3 all_classes = get_classes(classes_path) num_classes = len(all_classes) inputs = layers.Input(shape=(None, None, 3)) yolo = YOLOv4(inputs, num_classes, num_anchors) yolo.load_weights(model_path, by_name=True) _decode = Decode(0.05, 0.45, input_shape, yolo, all_classes) # detect images in test floder. for (root, dirs, files) in os.walk('images/test'): if files: start = time.time() for f in files: path = os.path.join(root, f) image = cv2.imread(path) image, boxes, scores, classes = _decode.detect_image( image, draw_image=True) cv2.imwrite('images/res/' + f, image) print('total time: {0:.6f}s'.format(time.time() - start))
from model.decode_np import Decode if __name__ == '__main__': file = 'data/coco_classes.txt' model_path = 'yolo_bgr_mAP_46.h5' # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) input_shape = (416, 416) # input_shape = (608, 608) _decode = Decode(0.6, 0.5, input_shape, model_path, file) # detect images in test floder. for (root, dirs, files) in os.walk('images/test'): if files: start = time.time() for f in files: # print(f) path = os.path.join(root, f) image = cv2.imread(path) image = _decode.detect_image(image) cv2.imwrite('images/res/' + f, image) print('total time: {0:.6f}s'.format(time.time() - start)) # detect videos one at a time in videos/test folder # video = 'library1.mp4' # _decode.detect_video(video)
if cv2.waitKey(1) == ord('q'): capture.release() cv2.destroyWindow("screen_title2") t1 = threading.Thread(target=job1) #t2 = threading.Thread(target = job2) t1.start() #t2.start() while True: if a > 0: bgr_image = frame1 gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) faces = face_detector.detect_image(bgr_image)[1] if faces is None: faces = () for face_coordinates in faces: x1, y1, x2, y2 = face_coordinates face_coordinates = [int(x1), int(y1), int(x2 - x1), int(y2 - y1)] x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0)
if torch.cuda.is_available(): # 如果有gpu可用,模型(包括了权重weight)存放在gpu显存里 yolo = yolo.cuda() yolo.load_state_dict(torch.load(model_path)) yolo.eval( ) # 必须调用model.eval()来设置dropout和batch normalization layers在运行推理前,切换到评估模式. 不这样做的化会产生不一致的推理结果. _decode = Decode(conf_thresh, nms_thresh, input_shape, yolo, all_classes) if not os.path.exists('images/res/'): os.mkdir('images/res/') path_dir = os.listdir('images/test') # warm up if use_gpu: for k, filename in enumerate(path_dir): image = cv2.imread('images/test/' + filename) image, boxes, scores, classes = _decode.detect_image( image, draw_image=False) if k == 10: break time_stat = deque(maxlen=20) start_time = time.time() end_time = time.time() num_imgs = len(path_dir) start = time.time() for k, filename in enumerate(path_dir): image = cv2.imread('images/test/' + filename) image, boxes, scores, classes = _decode.detect_image(image, draw_image) # 估计剩余时间 start_time = end_time end_time = time.time()
place = fluid.CUDAPlace(gpu_id) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) fluid.load(eval_prog, model_path, executor=exe) _decode = Decode(conf_thresh, nms_thresh, input_shape, exe, eval_prog, all_classes) if not os.path.exists('images/res/'): os.mkdir('images/res/') path_dir = os.listdir('images/test') # warm up if use_gpu: for k, filename in enumerate(path_dir): image = cv2.imread('images/test/' + filename) image, boxes, scores, classes = _decode.detect_image( image, eval_fetch_list, draw_image=False) if k == 10: break time_stat = deque(maxlen=20) start_time = time.time() end_time = time.time() num_imgs = len(path_dir) start = time.time() for k, filename in enumerate(path_dir): image = cv2.imread('images/test/' + filename) image, boxes, scores, classes = _decode.detect_image( image, eval_fetch_list, draw_image) # 估计剩余时间 start_time = end_time
class face_detect(): def __init__(self): # hyper-parameters for bounding boxes shape self.frame_window = 10 self.emotion_offsets = (20, 40) # loading models self.face_detector = Decode('data/voc_classes.txt', './weights/best_model.h5') self.emotion_classifier = load_model( 'model/fer2013_mini_XCEPTION.102-0.66.hdf5', compile=False) self.emotion_labels = get_labels('fer2013') # getting input model shapes for inference self.emotion_target_size = self.emotion_classifier.input_shape[1:3] # starting lists for calculating modes self.emotion_window = [] def faceDetect(self, bgr_image): faces = self.face_detector.detect_image(bgr_image)[1] probability = [0, 0, 0, 0, 0, 0, 0] if faces is not None and len(faces) == 1: bgr_image, probability = self.emo_Detect(bgr_image, faces[0]) return bgr_image, probability def emo_Detect(self, bgr_image, face_coordinates): gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) x1, y1, x2, y2 = face_coordinates face_coordinates = [int(x1), int(y1), int(x2 - x1), int(y2 - y1)] x1, x2, y1, y2 = apply_offsets(face_coordinates, self.emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] gray_face = cv2.resize(gray_face, (self.emotion_target_size)) gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = self.emotion_classifier.predict(gray_face) for idx, probability in enumerate(emotion_prediction[0]): print(self.emotion_labels[idx], probability) emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) emotion_text = self.emotion_labels[emotion_label_arg] self.emotion_window.append(emotion_text) if len(self.emotion_window) > self.frame_window: self.emotion_window.pop(0) try: emotion_mode = mode(self.emotion_window) print("mode is " + emotion_mode) except: pass if emotion_text == 'angry': color = emotion_probability * np.asarray((0, 0, 255)) elif emotion_text == 'sad': color = emotion_probability * np.asarray((255, 0, 0)) elif emotion_text == 'happy': color = emotion_probability * np.asarray((0, 255, 255)) elif emotion_text == 'surprise': color = emotion_probability * np.asarray((255, 255, 0)) else: color = emotion_probability * np.asarray((0, 255, 0)) color = color.astype(int) color = color.tolist() draw_bounding_box(face_coordinates, bgr_image, color) draw_text(face_coordinates, bgr_image, emotion_text, color, 0, -45, 1, 1) return bgr_image, emotion_prediction[0]
key_list = list(test_dic.keys()) key_len = len(key_list) while key_len == 0: time.sleep(0.01) key_list = list(test_dic.keys()) key_len = len(key_list) dic = test_dic['%.8d' % 0] image = dic['image'] pimage = dic['pimage'] im_info = dic['im_info'] # warm up if use_gpu: for k in range(10): image, boxes, scores, classes = _decode.detect_image(image, pimage, im_info, draw_image=False) time_stat = deque(maxlen=20) start_time = time.time() end_time = time.time() num_imgs = len(path_dir) start = time.time() for k, filename in enumerate(path_dir): key_list = list(test_dic.keys()) key_len = len(key_list) while key_len == 0: time.sleep(0.01) key_list = list(test_dic.keys()) key_len = len(key_list) dic = test_dic.pop('%.8d' % k)
def job1(): global count1 global frame use_gpu = True # 显存分配。 if use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) facesDetect = {'classes_path': 'data/voc_classes.txt', 'model_path': './weights/best_model.h5'} emotion_model_path = 'model/fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detector = Decode(facesDetect['classes_path'], facesDetect['model_path']) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] emotion_window1 = [] record1 = {'angry':[0], 'disgust':[0], 'fear':[0], 'happy':[0], 'sad':[0], 'surprise':[0], 'neutral':[0]} #record_diff = {'angry':[], 'disgust':[], 'fear':[], 'happy':[], 'sad':[], 'surprise':[], 'neutral':[]}''' emo_record1 = [] capture = cv2.VideoCapture(0) while True: if True: bgr_image = capture.read()[1] gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) faces = face_detector.detect_image(bgr_image)[1] if faces is None: faces = () # print(faces) for face_coordinates in faces: x1, y1, x2, y2 = face_coordinates face_coordinates = [int(x1), int(y1), int(x2-x1), int(y2-y1)] x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) ###############移動平均公式################ '''for idx, probability in enumerate(emotion_prediction[0]): alpha = 0.5 record[emotion_labels[idx]].append(record[emotion_labels[idx]][-1] + alpha * (round(probability*100, 2)-record[emotion_labels[idx]][-1])) emotion_prediction[0][idx] = record[emotion_labels[idx]][-1] if len(record[emotion_labels[idx]])>10: record[emotion_labels[idx]].pop(0) #print(record) #print()''' ######################################### #################自創權重############## emotion_prediction[0] = weights_change(emo_record1, emotion_prediction[0]) data=[] for idx, probability in enumerate(emotion_prediction[0]): data.append((emotion_labels[idx], probability)) rd1.append(data) count1+=1 emo_record1.append(np.argmax(emotion_prediction)) if len(emo_record1)>10: emo_record1.pop(0) ####################################### emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) emotion_text = emotion_labels[emotion_label_arg] emotion_window1.append(emotion_text) if len(emotion_window1) > frame_window: emotion_window1.pop(0) try: emotion_mode1 = mode(emotion_window1) except: continue if emotion_text == 'angry': color = emotion_probability * np.asarray((255, 0, 0)) elif emotion_text == 'sad': color = emotion_probability * np.asarray((0, 0, 255)) elif emotion_text == 'happy': color = emotion_probability * np.asarray((255, 255, 0)) elif emotion_text == 'surprise': color = emotion_probability * np.asarray((0, 255, 255)) else: color = emotion_probability * np.asarray((0, 255, 0)) color = color.astype(int) color = color.tolist() draw_bounding_box(face_coordinates, rgb_image, color) draw_text(face_coordinates, rgb_image, emotion_mode1, color, 0, -45, 1, 1) bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imshow('window_frame1', bgr_image) if cv2.waitKey(1) & 0xFF == 27: break cv2.destroyAllWindows()
if not os.path.exists('images/res/'): os.mkdir('images/res/') path_dir = os.listdir('images/test') capture = cv2.VideoCapture(video_path) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_name = os.path.split(video_path)[-1] if not os.path.exists(output_dir): os.makedirs(output_dir) out_path = os.path.join(output_dir, video_name) writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) index = 1 start = time.time() while (1): ret, frame = capture.read() if not ret: break print('detect frame:%d' % (index)) index += 1 pimage, im_size = _decode.process_image(np.copy(frame)) image, boxes, scores, classes = _decode.detect_image( frame, pimage, im_size, draw_image, draw_thresh) cv2.imshow("detection", frame) writer.write(frame) if cv2.waitKey(110) & 0xff == 27: break writer.release()