def get_emo_model(): emo_model = resmasking_dropout1(in_channels=3, num_classes=7) if is_cuda: emo_model.cuda(0) state = torch.load(local_checkpoint_path, map_location="cpu") emo_model.load_state_dict(state["net"]) emo_model.eval() return emo_model
def main(image_path): # load configs and set random seed configs = json.load(open('./configs/fer2013_config.json')) image_size = (configs['image_size'], configs['image_size']) # model = densenet121(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() state = torch.load('./saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') model.load_state_dict(state['net']) model.eval() image = cv2.imread(image_path) faces = face_cascade.detectMultiScale(image, 1.15, 5) gray = ensure_gray(image) for x, y, w, h in faces: cv2.rectangle(image, (x, y), (x + w, y + h), (179, 255, 179), 2) face = gray[y:y + h, x:x + w] face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize('{}: 000'.format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( image, (x + w, y + 1 - label_size[1]), (x + w + label_size[0], y + 1 + base_line), (223, 128, 255), cv2.FILLED ) cv2.putText( image, '{}: {}'.format(emo_label, int(emo_proba * 100)), (x + w, y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2 ) from barez import show show(image) '''
def load_model(): model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() state = torch.load("./saved/checkpoints/Z_resmasking_dropout1_rot30_2019Nov30_13.32") #state = torch.load("./saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33") model.load_state_dict(state["net"]) model.eval() return model
def main(): # load configs and set random seed configs = json.load(open("./configs/fer2013_config.json")) image_size = (configs["image_size"], configs["image_size"]) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() # state = torch.load('./saved/checkpoints/densenet121_rot30_2019Nov11_14.23') # state = torch.load('./saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') state = torch.load( "./saved/checkpoints/resmasking_dropout1_00" ) model.load_state_dict(state["net"]) model.eval() video_list = os.listdir(video_path) print(video_list) timestep = {} startTime = time.time() result = [] for video_name in video_list: print("The video is " + video_name) video_item_path = video_path + video_name vid = cv2.VideoCapture(video_item_path) if not vid.isOpened(): continue else: rate = round(vid.get(5)) print(type(rate)) FrameNumber = vid.get(7) duration = FrameNumber / rate step = rate // 8 print("The duration is %f s" % duration) print("The number of frame is %d " % FrameNumber) print("The rate of video is %d " % rate) subStartTime = time.time() with torch.no_grad(): cnt = 0 happy_time = 0 happy_Confidence = [] while True: ret, frame = vid.read() if frame is None or ret is not True: break if cnt % step != 0: cnt += 1 continue frame = np.fliplr(frame).astype(np.uint8) # frame += 50 h, w = frame.shape[:2] gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # gray = frame blob = cv2.dnn.blobFromImage( cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0), ) net.setInput(blob) faces = net.forward() happy_proba = 0 for i in range(0, faces.shape[2]): confidence = faces[0, 0, i, 2] if confidence < 0.7: continue box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) start_x, start_y, end_x, end_y = box.astype("int") # covnert to square images center_x, center_y = (start_x + end_x) // 2, (start_y + end_y) // 2 square_length = ((end_x - start_x) + (end_y - start_y)) // 2 // 2 square_length *= 1.1 start_x = int(center_x - square_length) start_y = int(center_y - square_length) end_x = int(center_x + square_length) end_y = int(center_y + square_length) cv2.rectangle( frame, (start_x, start_y), (end_x, end_y), (179, 255, 179), 2 ) face = gray[start_y:end_y, start_x:end_x] if face.shape[0] == 0 or face.shape[1] == 0: continue face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) proba = proba.cpu().numpy() # happy[cnt//6] = max(happy[cnt//6], proba[3]) emo_proba, emo_idx = proba[3], 3 happy_proba = max(happy_proba, emo_proba) emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( "{}: 000".format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2 ) cv2.rectangle( frame, (end_x, start_y + 1 - label_size[1]), (end_x + label_size[0], start_y + 1 + base_line), (223, 128, 255), cv2.FILLED, ) cv2.putText( frame, "{} {}".format(emo_label, int(emo_proba * 100)), (end_x, start_y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, ) video_name_sub = video_name.split('.')[0] if happy_proba > 0.8: happy_time += 1 happy_Confidence.append(happy_proba) if happy_time > 4: happy_time = 0 tmp_json = {} for key, val in JSON_templete.items(): tmp_json[key] = val tmp_json["videoId"] = video_name_sub tmp_json["endTime"] = cnt / rate if tmp_json["endTime"] < 2.0: tmp_json["endTime"] = 2.0 tmp_json["startTime"] = tmp_json["endTime"] - 2 happy_Confidence.sort(reverse=True) tmp_json["observation"]["labelConfidence"] = sum(happy_Confidence[:5])/5 result.append(tmp_json) happy_Confidence = [] elif len(happy_Confidence) == 8: happy_Confidence = [] cv2.imwrite(result_path + video_name + '/' + str(cnt).zfill(5) + '.jpg', frame) cnt += 1 subEndTime = time.time() print("Spending time of %s is %s s" % (video_name, subEndTime - subStartTime)) endTime = time.time() print("Total spending time is %s s" % (endTime - startTime)) with open(output_json, 'w') as f: json.dump(result, f)
def main(): # load configs and set random seed configs = json.load(open('./configs/fer2013_config.json')) image_size = (configs['image_size'], configs['image_size']) # model = densenet121(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() # state = torch.load('./saved/checkpoints/densenet121_rot30_2019Nov11_14.23') # state = torch.load('./saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') state = torch.load( './saved/checkpoints/Z_resmasking_dropout1_rot30_2019Nov30_13.32') model.load_state_dict(state['net']) model.eval() vid = cv2.VideoCapture(0) # cv2.namedWindow('disp') # cv2.resizeWindow('disp', width=800) with torch.no_grad(): while True: ret, frame = vid.read() if frame is None or ret is not True: continue try: frame = np.fliplr(frame).astype(np.uint8) # frame += 50 h, w = frame.shape[:2] gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # gray = frame blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) faces = net.forward() for i in range(0, faces.shape[2]): confidence = faces[0, 0, i, 2] if confidence < 0.5: continue box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) start_x, start_y, end_x, end_y = box.astype("int") #covnert to square images center_x, center_y = (start_x + end_x) // 2, (start_y + end_y) // 2 square_length = ((end_x - start_x) + (end_y - start_y)) // 2 // 2 square_length *= 1.1 start_x = int(center_x - square_length) start_y = int(center_y - square_length) end_x = int(center_x + square_length) end_y = int(center_y + square_length) cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (179, 255, 179), 2) # cv2.rectangle(frame , (x, y), (x + w, y + h), (179, 255, 179), 2) # face = gray[y:y + h, x:x + w] face = gray[start_y:end_y, start_x:end_x] face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) # emo_idx = torch.argmax(proba, dim=0).item() emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( '{}: 000'.format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( frame, (end_x, start_y + 1 - label_size[1]), (end_x + label_size[0], start_y + 1 + base_line), (223, 128, 255), cv2.FILLED) cv2.putText( frame, '{} {}'.format(emo_label, int(emo_proba * 100)), (end_x, start_y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) cv2.imshow('disp', frame) # cv2.imshow('disp', np.concatenate((gray ), axis=1)) if cv2.waitKey(1) == ord('q'): break except: continue cv2.destroyAllWindows()
def main(video_path): # load configs and set random seed configs = json.load(open('./configs/fer2013_config.json')) image_size = (configs['image_size'], configs['image_size']) # model = densenet121(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() state = torch.load( './saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') model.load_state_dict(state['net']) model.eval() video = cv2.VideoCapture(video_path) cv2.namedWindow('disp', cv2.WINDOW_NORMAL) cv2.resizeWindow('disp', width=1500, height=800) cnt = 0 passed_frame = 0 with torch.no_grad(): while True: cnt += 1 ret, image = video.read() if image is None or ret is not True or passed_frame != 0: passed_frame -= 1 passed_frame = max(passed_frame, 0) continue image = image.astype(np.uint8) faces = face_cascade.detectMultiScale(image, 1.15, 5) gray = ensure_gray(image) for x, y, w, h in faces: cv2.rectangle(image, (x, y), (x + w, y + h), (179, 255, 179), 2) face = gray[y:y + h, x:x + w] face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( '{}: 000'.format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle(image, (x + w, y + 1 - label_size[1]), (x + w + label_size[0], y + 1 + base_line), (223, 128, 255), cv2.FILLED) cv2.putText(image, '{}: {}'.format(emo_label, int(emo_proba * 100)), (x + w, y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) cv2.imshow('disp', image.astype(np.uint8)) key = cv2.waitKey(0) if key == ord('w'): cv2.imwrite('./real_life_demo/matbiec_{}.png'.format(cnt), image) elif key == ord('q'): break elif key == ord('f'): passed_frame = 10 elif key == ord('g'): passed_frame = 100 elif key == ord('h'): passed_frame = 1000 cv2.destroyAllWindows()
def main(): # load configs and set random seed configs = json.load(open('./configs/fer2013_config.json')) image_size = (configs['image_size'], configs['image_size']) # model = densenet121(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() # state = torch.load('./saved/checkpoints/densenet121_rot30_2019Nov11_14.23') state = torch.load( './saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') model.load_state_dict(state['net']) model.eval() vid = cv2.VideoCapture(0) with torch.no_grad(): while True: ret, frame = vid.read() if frame is None or ret is not True: continue frame = np.fliplr(frame).astype(np.uint8) gray = ensure_gray(frame) faces = face_cascade.detectMultiScale(gray, 1.3, 5) for x, y, w, h in faces: cv2.rectangle(frame, (x, y), (x + w, y + h), (179, 255, 179), 2) face = gray[y:y + h, x:x + w] face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) ''' for idx, value in enumerate(output.data.tolist()): cv2.putText( frame, "{}:\t {:.3f}".format(FER_2013_EMO_DICT[idx], value), (x + w, y + 1 + 20 * (idx + 1)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2 ) ''' # emo_idx = torch.argmax(proba, dim=0).item() emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( '{} {}'.format(emo_label, int()), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle(frame, (x + w, y + 1 - label_size[1]), (x + w + label_size[0], y + 1 + base_line), (223, 128, 255), cv2.FILLED) cv2.putText(frame, '{} {}'.format(emo_label, int(emo_proba * 100)), (x + w, y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) ''' cv2.putText( frame, emo_label, (x + w, y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2 ) ''' cv2.imshow('disp', frame) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
tensor = torch.squeeze(tensor, 0) tensor = torch.mean(tensor, 0) tensor = tensor.detach().cpu().numpy() tensor = np.maximum(tensor, 0) tensor = cv2.resize(tensor, (224, 224)) tensor = tensor - np.min(tensor) tensor = tensor / np.max(tensor) # print(np.unique(tensor)) heatmap = cv2.applyColorMap(np.uint8(255 * tensor), cv2.COLORMAP_JET) return heatmap # return tensor model = resmasking_dropout1(3, 7) state = torch.load( "./saved/checkpoints/resmasking_naive_dropout1__sigmoid_2019Dec17_14.40") model.load_state_dict(state["net"]) model.cuda() model.eval() # for image_path in natsorted(glob.glob('/home/z/research/bkemo/images/**/*.png', recursive=True)): for image_path in natsorted( glob.glob("/home/z/research/bkemo/debug/**/*.png", recursive=True)): image_name = os.path.basename(image_path) if not os.path.exists("./landmark_false/{}".format(image_name)): continue print(image_name)
3: "happy", 4: "sad", 5: "surprise", 6: "neutral", } # load configs and set random seed package_root_dir = os.path.dirname(__file__) config_path = os.path.join(package_root_dir, "configs/fer2013_config.json") with open(config_path) as ref: configs = json.load(ref) image_size = (configs["image_size"], configs["image_size"]) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() state = torch.load(local_checkpoint_path) model.load_state_dict(state["net"]) model.eval() def video_demo(): vid = cv2.VideoCapture(0) with torch.no_grad(): while True: ret, frame = vid.read() if frame is None or ret is not True: continue
def main(): # load configs and set random seed configs = json.load(open("./configs/fer2013_config.json")) image_size = (configs["image_size"], configs["image_size"]) # model = densenet121(in_channels=3, num_classes=7) model = resmasking_dropout1(in_channels=3, num_classes=7) model.cuda() # state = torch.load('./saved/checkpoints/densenet121_rot30_2019Nov11_14.23') # state = torch.load('./saved/checkpoints/resmasking_dropout1_rot30_2019Nov17_14.33') state = torch.load( "./saved/checkpoints/Z_resmasking_dropout1_rot30_2019Nov30_13.32") model.load_state_dict(state["net"]) model.eval() video_list = os.listdir(video_path) print(video_list) for video_name in video_list: print("The video is " + video_name) video_item_path = video_path + video_name vid = cv2.VideoCapture(video_item_path) if not vid.isOpened(): continue else: rate = round(vid.get(5)) print(type(rate)) FrameNumber = vid.get(7) duration = FrameNumber / rate print("The duration is %f s" % duration) print("The number of frame is %d " % FrameNumber) print("The rate of video is %d " % rate) start_time = time.time() # happy = np.zeros((int(FrameNumber//6) + 1, 1)) with torch.no_grad(): cnt = 0 while True: ret, frame = vid.read() if frame is None or ret is not True: break # if cnt % 6 != 0: # cnt += 1 # continue frame = np.fliplr(frame).astype(np.uint8) # frame += 50 h, w = frame.shape[:2] gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # gray = frame blob = cv2.dnn.blobFromImage( cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0), ) net.setInput(blob) faces = net.forward() for i in range(0, faces.shape[2]): confidence = faces[0, 0, i, 2] if confidence < 0.5: continue box = faces[0, 0, i, 3:7] * np.array([w, h, w, h]) start_x, start_y, end_x, end_y = box.astype("int") # covnert to square images center_x, center_y = (start_x + end_x) // 2, (start_y + end_y) // 2 square_length = ((end_x - start_x) + (end_y - start_y)) // 2 // 2 square_length *= 1.1 start_x = int(center_x - square_length) start_y = int(center_y - square_length) end_x = int(center_x + square_length) end_y = int(center_y + square_length) cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (179, 255, 179), 2) face = gray[start_y:end_y, start_x:end_x] if face.shape[0] == 0 or face.shape[1] == 0: continue face = ensure_color(face) face = cv2.resize(face, image_size) face = transform(face).cuda() face = torch.unsqueeze(face, dim=0) output = torch.squeeze(model(face), 0) proba = torch.softmax(output, 0) # proba = proba.cpu().numpy() # happy[cnt//6] = max(happy[cnt//6], proba[3]) emo_proba, emo_idx = torch.max(proba, dim=0) emo_idx = emo_idx.item() emo_proba = emo_proba.item() emo_label = FER_2013_EMO_DICT[emo_idx] label_size, base_line = cv2.getTextSize( "{}: 000".format(emo_label), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) cv2.rectangle( frame, (end_x, start_y + 1 - label_size[1]), (end_x + label_size[0], start_y + 1 + base_line), (223, 128, 255), cv2.FILLED, ) cv2.putText( frame, "{} {}".format(emo_label, int(emo_proba * 100)), (end_x, start_y + 1), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, ) cv2.imwrite( result_path + video_name + '/' + str(cnt).zfill(5) + '.jpg', frame) cnt += 1 end_time = time.time() print("Spending time is %s s" % (end_time - start_time))