def clean_pointing04(pointing04_dir, destination_dir, detector, confidence_threshold, out_size, grayscale=False, interpolation=cv2.INTER_LINEAR, start_count=0, duplicate_until=0): ''' Performs the basic processing of the Pointing'04 dataset, obtaining cropped pictures for each head detection in the original images, and getting the ground truth pose values for each detection from the .mat file. Arguments: pointing04_dir: Directory containing Pointing'04 dataset pictures in its root. destination_dir: Directory where cropped pictures and .csv containing pose values for each crop will be stored. detector: Keras model used for detecting human heads in pictures. confidence_threshold: Value used to filter detections (detections must have a confidence value higher than this value in order to be considered valid). out_size: Length of each side of final cropped pictures. start_count: Initial value of the picture count; used to assign filenames to cropped pictures, when AFLW dataset is not processed first. duplicate_until: Target number of pictures per class; used in order to augment the size of the dataset by duplicating pictures on each class. Each pose in the dataset corresponds to a different class. Returns: count: Total number of cropped pictures obtained (starting from start_count + 1). t_ratio: Ratio between the number of true detections and the number of annotated heads in the dataset. f_ratio: Ratio between the number of false detections and the total number of detections. ''' # Initialize count. count = start_count # If labels.csv exists in destination dir, append pose values; if it doesn't exists then create it. if os.path.isfile(destination_dir + 'labels.csv'): file = open(destination_dir + 'labels.csv', 'a') else: file = open(destination_dir + 'labels.csv', 'w') file.write('file,tilt,pan\n') # Initialize count of true detections, processed annotated heads, false detections and total detections. t_count = 0 p_count = 0 f_count = 0 d_count = 0 # Create empty arrays for storing pictures in each class. pics_by_class = [[] for i in range(169)] # Set number of channels for cropped pictures. if grayscale == True: out_shape = (out_size, out_size) else: out_shape = (out_size, out_size, 3) # For each person in the dataset: for i in range(1, 16): # Get the path for every picture of that person. num = '{0:02}'.format(i) path = pointing04_dir + "Person" + num images = glob.glob(path + "/*.jpg") # For each picture: for img in images: # Increase processed annotated heads count. p_count = p_count + 1 # Get ground truth pose values from filename. tilt, pan = pose_from_filename(img) # Load the picture. pic = cv2.imread(img) # Get detections for the loaded picture. bboxes = get_head_bboxes(pic, detector, confidence_threshold) # Increase detections count. d_count = d_count + len(bboxes) # Update detection counters. if len(bboxes) > 1: f_count = f_count + len(bboxes) - 1 t_count = t_count + 1 else: t_count = t_count + len(bboxes) # Get cropped pictures from the loaded picture. if grayscale == True: pic = cv2.cvtColor(pic, cv2.COLOR_BGR2GRAY) c_pics = get_cropped_pics(pic, bboxes, out_size, 0, cropping='small', interpolation=interpolation) # For each cropped picture: for c_pic in c_pics: # If the size of the cropped pic is the expected: if c_pic.shape == out_shape: # Store picture. cv2.imwrite(destination_dir + "pic_" + str(count) + ".jpg", c_pic) file.write("pic_" + str(count) + ".jpg," + str(tilt) + "," + str(pan) + "\n") # Assign class to picture from its pose values. p_class = int(13 * ((tilt + 90) / 15) + ((pan + 90) / 15)) pics_by_class[p_class].append(count) # Mirror picture pan = -1 * pan c_pic = cv2.flip(c_pic, 1) # Store mirrored picture. cv2.imwrite( destination_dir + "pic_" + str(count + 1) + ".jpg", c_pic) file.write("pic_" + str(count + 1) + ".jpg," + str(tilt) + "," + str(pan) + "\n") # Calculate class for mirrored picture. p_class = int(13 * ((tilt + 90) / 15) + ((pan + 90) / 15)) pics_by_class[p_class].append(count + 1) # Increase cropped picture count. count = count + 2 print("Count:", count) ''' If duplicate_until has a value of -1, the pictures in the dataset are duplicated until the number of cropped pictures match the number of pictures indicated by start_count; else, the target number of pictures per class will match that value. ''' if duplicate_until == -1: increase = start_count / (count - start_count) else: target_len = duplicate_until # If there are cropped pictures for the Pointing'04 dataset: if (count - start_count) > 0: # For each class: for i in range(len(pics_by_class)): # If there are pictures in the current class: if pics_by_class[i]: ''' If duplicate_until has a value of -1, the target number of pictures for the current class is equal to the number of pictures already in that class times the proportion between the value of start_count and the number of cropped pictures obtained until this point. ''' if duplicate_until == -1: target_len = int(len(pics_by_class[i]) * increase) # If the number of pics in the current class is below the target number of pictures: if len(pics_by_class[i]) < target_len: # Get pose values from class label. tilt = int(i / 13) * 15 - 90 pan = int(i % 13) * 15 - 90 # Duplicate pictures in class until the target number of pictures is reached. for j in range(target_len - len(pics_by_class[i])): shutil.copyfile( destination_dir + "pic_" + str(pics_by_class[i][int( j % len(pics_by_class[i]))]) + ".jpg", destination_dir + "pic_" + str(count) + ".jpg") file.write("pic_" + str(count) + ".jpg," + str(tilt) + "," + str(pan) + "\n") # Increase cropped picture count. count = count + 1 print("Count:", count) # Calculate t_ratio and f_ratio. t_ratio = t_count / p_count f_ratio = f_count / d_count # Return number of cropped pictures obtained (starting from start_count + 1), t_ratio and f_ratio. return count, t_ratio, f_ratio
# Try to get a frame from the camera. out, img = cam.read() # If there is no frame, exit. if out == False: break # Flip picture if needed. if flip == 'Y': img = cv2.flip(img, 1) # Get bounding boxes for every detected head in the picture. detection_start = datetime.now() bboxes = get_head_bboxes(img, head_detector, confidence_threshold) detection_end = datetime.now() # Calculate time used in head detection. detection_time = (detection_end - detection_start).total_seconds() # Get cropped pics for every valid bounding box. gray_pic = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) heads = get_cropped_pics(gray_pic, bboxes, in_size_estimator, 0, cropping='small') # Initialize head counter. head_count = 0 # Reset estimation time. estimation_time = 0
def clean_aflw(aflw_dir, aflw_mat, destination_dir, detector, confidence_threshold, out_size, grayscale=False, interpolation=cv2.INTER_LINEAR, start_count=0): ''' Performs the basic processing of the AFLW dataset, obtaining cropped pictures for each head detection in the original images, and getting the ground truth pose values for each detection from the .mat file. Arguments: aflw_dir: Directory containing AFLW dataset pictures in its root. aflw_mat: Path to the .mat file containing pose values for each detection. destination_dir: Directory where cropped pictures and .csv containing pose values for each crop will be stored. detector: Keras model used for detecting human heads in pictures. confidence_threshold: Value used to filter detections (detections must have a confidence value higher than this value in order to be considered valid). out_size: Length of each side of final cropped pictures. start_count: Initial value of the picture count; used to assign filenames to cropped pictures, when AFLW dataset is not processed first. Returns: count: Total number of cropped pictures obtained (starting from start_count + 1). t_ratio: Ratio between the number of true detections and the number of annotated heads in the dataset. f_ratio: Ratio between the number of false detections and the total number of detections. ''' # Loading .mat file. mat = loadmat(aflw_mat) values = sorted(zip(mat['fileids'], mat['bboxes'], mat['pose']), key=lambda x: x[0]) # Initialize count. count = start_count # If labels.csv exists in destination dir, append pose values; if it doesn't exists then create it. if os.path.isfile(destination_dir + 'labels.csv'): file = open(destination_dir + 'labels.csv', 'a') else: file = open(destination_dir + 'labels.csv', 'w') file.write('file,tilt,pan\n') # Iterator for every annotated head in the dataset (tuples in the .mat file). iterator = range(len(values)).__iter__() # Initialize count of true detections, processed annotated heads, false detections and total detections. t_count = 0 p_count = 0 f_count = 0 d_count = 0 # Set number of channels for cropped pictures. if grayscale == True: out_shape = (out_size, out_size) else: out_shape = (out_size, out_size, 3) # For every detection in the dataset: for tuple_index in iterator: # Increase processed annotated heads count. p_count = p_count + 1 # Load the picture containing the annotated head. pic = cv2.imread(aflw_dir + values[tuple_index][0].strip()) # Get detections for the loaded picture. detected_bboxes = get_head_bboxes(pic, detector, confidence_threshold) # Increase detections count. d_count = d_count + len(detected_bboxes) # Get cropped pictures from the loaded picture. if grayscale == True: pic = cv2.cvtColor(pic, cv2.COLOR_BGR2GRAY) c_pics = get_cropped_pics(pic, detected_bboxes, out_size, 0, cropping='small', interpolation=interpolation) # Create arrays containing ground truth detections and poses, and initialize them with the values corresponding # to the current tuple. true_bboxes = [values[tuple_index][1]] poses = [values[tuple_index][2]] i = 1 ''' For each following picture with the same fileid, add ground truth detections and poses to the arrays created before: ''' id = values[tuple_index][0] while (tuple_index + i) < len(mat['fileids']) and values[tuple_index + i][0] == id: p_count = p_count + 1 true_bboxes.append(values[tuple_index + i][1]) poses.append(values[tuple_index + i][2]) i = i + 1 collections.deque(itertools.islice(iterator, i - 1)) # Update detection counters. if len(detected_bboxes) > len(true_bboxes): f_count = f_count + len(detected_bboxes) - len(true_bboxes) t_count = t_count + len(true_bboxes) else: t_count = t_count + len(detected_bboxes) # If there are cropped pictures: if c_pics: # Match detected bounding boxes with ground truth bounding boxes. indexes = bbox_match(detected_bboxes, true_bboxes) # For each matching: for box_index in range(len(indexes)): ''' If there is a valid matching between a detected bounding box and a ground truth bounding box and the size of the cropped pic corresponding to the detected bounding box is the expected: ''' if indexes[box_index] != -1 and c_pics[ box_index].shape == out_shape: # Pose values for the detection are the values corresponding to its matching ground truth annotated head. tilt = degrees(poses[indexes[box_index]][1]) pan = degrees(poses[indexes[box_index]][2]) # Get cropped pic corresponding to the detection. c_pic = c_pics[box_index] # Store picture. cv2.imwrite(destination_dir + "pic_" + str(count) + ".jpg", c_pic) file.write("pic_" + str(count) + ".jpg," + str(tilt) + "," + str(pan) + "\n") # Mirror picture. pan = -1 * pan c_pic = cv2.flip(c_pic, 1) # Store mirrored picture. cv2.imwrite( destination_dir + "pic_" + str(count + 1) + ".jpg", c_pic) file.write("pic_" + str(count + 1) + ".jpg," + str(tilt) + "," + str(pan) + "\n") # Increase cropped picture count. count = count + 2 print("Count:", count) # Calculate t_ratio and f_ratio. t_ratio = t_count / p_count f_ratio = f_count / d_count # Return number of cropped pictures obtained (starting from start_count + 1), t_ratio and f_ratio. return count, t_ratio, f_ratio