def get_Cropped_face(image_path): """ #TODO - update to crop + alighn images summary: return cropped and alighned frontal face image input: image path output: PIL image """ img = Image.open(image_path) bounding_boxes, landmarks = detect_faces(img) show_bboxes(img, bounding_boxes, landmarks) return img.crop(tuple(list(bounding_boxes[0])[:4]))
def face_detect(filename, save_file_name): img = Image.open(filename) print(img) bounding_boxes, landmarks = detect_faces(img) img_copy = show_bboxes(img, bounding_boxes, landmarks) img_copy.save(save_file_name) return img_copy
def __init__(self, model): super(Net, self).__init__() self.resnet_layer = nn.Sequential(*list(model.children())[:-2]) self.fc = nn.Linear(4608, 212) def forward(self, x): x = self.resnet_layer(x) x = x.view(x.size(0), -1) x = self.fc(x) return x img_path = sys.argv[1] #img_path = '/home/lc/cy/hourglass-facekeypoints-detection/datasets/test/new_test/0130.jpg' img = Image.open(img_path) bounding_boxes, landmarks, flag = cal_landmark(img_path) new_landmarks = [] #change order for p in landmarks: new_p = fchange(p) new_landmarks.append(new_p) landmark = new_landmarks[0] a = show_bboxes(img, bounding_boxes, new_landmarks) a.show()
def main(args): videos_directory = args.videos_dir results_dir = args.results_dir vids_name = args.category vid_proc_name = args.log_file dataset_annotation_file = args.ann_file if args.save_videos == 'True': save_videos = True else: save_videos = False # Create video window cv2.namedWindow('Original') # load or create list with processed files processed_files = [] videos_processed_exists = os.path.isfile( os.path.join(results_dir, vid_proc_name)) if not videos_processed_exists: with open(os.path.join(results_dir, vid_proc_name), "w") as fp: for pfiles in processed_files: print(pfiles, file=fp) else: with open(os.path.join(results_dir, vid_proc_name)) as fp: processed_files = fp.read().splitlines() # Create annotation file the first time annotation_exists = os.path.isfile( os.path.join(results_dir, dataset_annotation_file)) if not annotation_exists: try: with open(os.path.join(results_dir, dataset_annotation_file), 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() except IOError: print("Error creating annotaton file. I/O error") # Get json files list names in videos directory files_list = [] for ann_file in os.listdir(os.path.join(videos_directory, vids_name)): if ann_file.endswith(".json"): files_list.append(ann_file[0:-5]) files_list = natsorted(files_list) num_files = len(files_list) print('found', num_files, 'files') # traverse all the files for file in files_list: # check if current video is not in alredy processed if file in processed_files: print(file, 'has already been processed. Skipping it.') continue num_output_video = 0 # Search for the video files in videos_directory video_name = file + '.mp4' print('Processing video:', video_name) if save_videos: # create output directory output_dir = os.path.join(results_dir, vids_name, file) if not os.path.isdir(output_dir): os.mkdir(output_dir) # Load watson results with open(os.path.join(videos_directory, vids_name, file + '.json')) as f: stt_results = json.load(f) # Extract all the words with confidence >90 words_data = extract_words_from_watson_results(stt_results, max_words=5) # Start the video capture cap = cv2.VideoCapture( os.path.join(videos_directory, vids_name, video_name)) # Extract video metadata height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) fps = cap.get(cv2.CAP_PROP_FPS) print('video resolution:', width, ' x ', height) print('video framerate:', fps) frame_count = 0 fps_processing = 30.0 # fps holder t = cv2.getTickCount() # initiate the tickCounter count = 0 for entry in words_data: # Extract speech to text data print('entry:', type(entry), entry) s_sec, s_millisec = divmod(float(entry['start']), 1) e_sec, e_millisec = divmod(float(entry['end']), 1) s_min = 0 e_min = 0 s_millisec = s_millisec * 1000 e_millisec = e_millisec * 1000 print('s_sec, s_millisec:', s_sec, s_millisec) if s_sec >= 60: s_min = math.floor(s_sec / 60.0) s_sec = s_sec % 60 if e_sec >= 60: e_min = math.floor(e_sec / 60.0) e_sec = e_sec % 60 # Determine video frames involved in stt entry min_frame = s_min * fps * 60 + (s_sec * fps) max_frame = e_min * fps * 60 + (e_sec * fps) # go to min_frame cap.set(cv2.CAP_PROP_POS_FRAMES, min_frame) frame_count = min_frame # read frames from min_frame to max_frame num_people = 0 valid_video = True bbx1 = [] bby1 = [] bbx2 = [] bby2 = [] consecutive_frames_no_people = 0 while frame_count < max_frame: if count == 0: t = cv2.getTickCount() # capture next frame ret, frame = cap.read() if not ret: continue frame_count += 1 # resize frame for faster processing if frame.shape[0] <= 0 or frame.shape[1] <= 0: continue frame_small = cv2.resize(frame, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) # detect faces and landmarjs bounding_boxes, landmarks = detect_faces(frame_small) num_people = bounding_boxes.shape[0] bounding_boxes /= scale landmarks /= scale # if it detects less than or more than 1 person # go to next subtitle if num_people != 1: consecutive_frames_no_people += 1 if consecutive_frames_no_people >= max_bad_frames: print( consecutive_frames_no_people, ' frames without 1 person. Skiping to next subtitle') valid_video = False break # if only one person in the scene if num_people == 1: consecutive_frames_no_people = 0 # extract the bounding box bb = bounding_boxes[0] x1, y1 = int(bb[0]), int(bb[1]) x2, y2 = int(bb[2]), int(bb[3]) area = (x2 - x1) * (y2 - y1) if area < min_area: valid_video = False break # save bounding box coordinates for final crop bbx1.append(x1) bbx2.append(x2) bby1.append(y1) bby2.append(y2) # draw the bounding box and landmarks on original frame frame = show_bboxes(frame, bounding_boxes, landmarks) # Put fps at which we are processing camera feed on frame cv2.putText(frame, "{0:.2f}-fps".format(fps_processing), (50, height - 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 2) # Display the image cv2.imshow('Original', frame) # Read keyboard and exit if ESC was pressed k = cv2.waitKey(1) & 0xFF if k == 27: exit() elif k == ord('q'): break # increment frame counter count = count + 1 # calculate fps at an interval of 100 frames if (count == 30): t = (cv2.getTickCount() - t) / cv2.getTickFrequency() fps_processing = 30.0 / t count = 0 # if this was a valid video if valid_video and len(bbx1) > 0: num_output_video += 1 # get final coordinates bbx1 = np.amin(np.array(bbx1)) bbx2 = np.amax(np.array(bbx2)) bby1 = np.amin(np.array(bby1)) bby2 = np.amax(np.array(bby2)) bbw = bbx2 - bbx1 bbh = bby2 - bby1 entry['bounding_box'] = [bbx1, bby1, bbw, bbh] print('entry:', type(entry), entry) if save_videos: s_hr = 0 e_hr = 0 if s_min >= 60: s_hr = math.floor(s_min / 60) s_min = s_min % 60 if e_min >= 60: e_hr = math.floor(e_min / 60) e_min = e_min % 60 # cut and crop video # ffmpeg -i input.mp4 -ss hh:mm:ss -filter:v crop=w:h:x:y -c:a copy -to hh:mm:ss output.mp4 ss = "{0:02d}:{1:02d}:{2:02d}.{3:03d}".format( s_hr, s_min, int(s_sec), math.ceil(s_millisec)) es = "{0:02d}:{1:02d}:{2:02d}.{3:03d}".format( e_hr, e_min, int(e_sec), math.ceil(e_millisec)) crop = "crop={0:1d}:{1:1d}:{2:1d}:{3:1d}".format( bbw, bbh, bbx1, bby1) out_name = os.path.join(output_dir, str(num_output_video)) subprocess.call([ 'ffmpeg', #'-hide_banner', '-loglevel', 'panic', '-i', os.path.join(videos_directory, vids_name, video_name), '-ss', ss, '-filter:v', crop, '-c:a', 'copy', '-to', es, out_name + '.mp4' ]) # save recognized speech text_file = open(out_name + '.txt', "w") text_file.write(entry['text'] + '\n') text_file.write(str(entry['conf'])) text_file.close() # delete the entries without bounding box words_data[:] = [ dic for dic in words_data if len(dic['bounding_box']) > 0 ] # append results to annotation file append_annotation_file( os.path.join(results_dir, dataset_annotation_file), words_data) # save name of processed file processed_files.append(file) with open(os.path.join(results_dir, vid_proc_name), "w") as fp: for p_file in processed_files: print(p_file, file=fp) # Release resources cap.release() cv2.destroyAllWindows()
t = cv2.getTickCount() count = 0 while True: if count == 0: t = cv2.getTickCount() # capture next frame ret, frame = cap.read() #frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) bounding_boxes, landmarks = detect_faces(frame) # render frame frame = show_bboxes(frame, bounding_boxes, landmarks) # Put fps at which we are processinf camera feed on frame cv2.putText(frame, "{0:.2f}-fps".format(fps), (50, height - 50), cv2.FONT_HERSHEY_COMPLEX, 1.5, (0, 0, 255), 3) # Display the image cv2.imshow('frame', frame) # Read keyboard and exit if ESC was pressed k = cv2.waitKey(10) & 0xFF if k == 27: break # increment frame counter count = count + 1
from src import detect_faces, show_bboxes import numpy as np import cv2 img = cv2.imread('/home/juan/Pictures/office1.jpg', cv2.IMREAD_COLOR) bounding_boxes, landmarks = detect_faces(img) img = show_bboxes(img, bounding_boxes, landmarks) cv2.imshow('Image', img) cv2.waitKey(0)
cap = cv2.VideoCapture(0) while(True): try: # Capture frame-by-frame _, cv2_im = cap.read() cv2_im = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) bounding_boxes, landmarks = detect_faces(pil_im) pil_result = show_bboxes(pil_im, bounding_boxes, landmarks) opencvImage = np.array(pil_result) opencvImage = cv2.cvtColor(np.array(pil_result), cv2.COLOR_RGB2BGR) cv2.imshow('frame',opencvImage) except: pass if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def main(): # Load the subtitles subs = pysrt.open('/home/juan/Videos/AMOR.es.srt', encoding='iso-8859-1') # Start the video capture cap = cv2.VideoCapture('/home/juan/Videos/AMOR.mp4') cv2.namedWindow('Original') cv2.namedWindow('Cropped') # Extract number of subtitles num_subs = len(subs) print('Num subtitles:', num_subs) # Extract video metadata height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) fps = cap.get(cv2.CAP_PROP_FPS) print('video resolution:', width, ' x ', height) print('video framerate:', fps) cv2.waitKey(0) for s_idx, sub in enumerate(subs): s = "{0:1d}, {1:02d}:{2:02d} to {3:02d}:{4:02d} {5:s}" text = cleanhtml(sub.text) print( s.format(s_idx, sub.start.minutes, sub.start.seconds, sub.end.minutes, sub.end.seconds, text)) while True: # capture next frame ret, frame = cap.read() if PROCESS_VIDEO: # resiz frame for faster processing frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) # detect faces and landmarjs bounding_boxes, landmarks = detect_faces(frame) # if only one face detected if bounding_boxes.shape[0] == 1: # extract the bounding box bb = bounding_boxes[0] x1, y1, x2, y2 = int(bb[0]), int(bb[1]), int(bb[2]), int(bb[3]) # crop the face cropped = frame[y1:y2, x1:x2] cv2.imshow('Cropped', cropped) # draw the bounding box and landmarks on original frame frame = show_bboxes(frame, bounding_boxes, landmarks) # Display the image cv2.imshow('Original', frame) # Read keyboard and exit if ESC was pressed k = cv2.waitKey(10) & 0xFF if k == 27: break # Release resources cap.release() cv2.destroyAllWindows()
from src import detect_faces, show_bboxes from PIL import Image image_1 = Image.open('test2.jpg') bounding_boxes, landmarks = detect_faces(image_1) image_2 = show_bboxes(image_1, bounding_boxes, landmarks) image_2.show() i = 0 # using for naming for b in bounding_boxes: region = (b[0], b[1], b[2], b[3]) face_cut = image_2.crop(region) face_cut = face_cut.resize((32, 32)) face_cut.show() face_cut.save('face_cut_' + str(i) + '.jpg') i += 1
import base64 import cv2 cap = cv2.VideoCapture(0) cv2.namedWindow('preview', cv2.WINDOW_GUI_EXPANDED) while True: retval, image = cap.read() retval, buffer = cv2.imencode('.jpg', image) jpg_as_text = base64.b64encode(buffer).decode('utf-8') try: headers = {'Content-Type': r"application/json"} data = {'jpg_as_text': jpg_as_text} response = requests.post(url='http://localhost:3000/api/detect-face', headers=headers, data=json.dumps(data)) if response.status_code != 200: continue data = json.loads(response.text) bounding_boxes = data.get('bounding_boxes', [[]]) landmarks = data.get('landmarks', [[]]) PIL_image = Image.fromarray(image.astype('uint8'), 'RGB') image = show_bboxes(PIL_image, bounding_boxes, landmarks) image = np.array(image) cv2.imshow('preview', image) if cv2.waitKey(25) & 0xFF == ord('q'): cv2.destroyAllWindows() break except: cv2.imshow('preview', image)