def read_image_and_gt(img_files, gt_files, config): ''' Transform images and send transformed image and label :param img_files: list of image files including the path of a batch :param gt_files: list of gt files including the path of a batch :param config: config dict containing various hyperparameters :return images and annotations ''' labels = [] bboxes = [] deltas = [] aidxs = [] #loads annotations from file def load_annotation(gt_file): with open(gt_file, 'r') as f: lines = f.readlines() f.close() annotations = [] #each line is an annotation bounding box for line in lines: obj = line.strip().split(' ') #get class, if class is not in listed, skip it try: cls = config.CLASS_TO_IDX[obj[0].lower().strip()] # print(cls) #get coordinates xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) #check for valid bounding boxes assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}' \ .format(xmin, xmax, gt_file) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}' \ .format(ymin, ymax, gt_file) #transform to point + width and height representation x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) annotations.append([x, y, w, h, cls]) except: print(obj) continue return annotations #init tensor of images imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) img_idx = 0 #iterate files for img_name, gt_name in zip(img_files, gt_files): #open img img = cv2.imread(img_name).astype(np.float32, copy=False) # scale image img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) #subtract means img = (img - np.mean(img)) / np.std(img) #store original height and width? orig_h, orig_w, _ = [float(v) for v in img.shape] #print(orig_h, orig_w) # load annotations annotations = load_annotation(gt_name) #split in classes and boxes labels_per_file = [a[4] for a in annotations] bboxes_per_file = np.array([a[0:4] for a in annotations]) #dynamic Data Augmentation img = np.asarray(img) bboxes_per_file = [ fix_bbox(*img.shape[:2], bbox) for bbox in bboxes_per_file ] #print(bboxes_per_file) annotations = { 'image': img, 'bboxes': bboxes_per_file, 'labels': labels_per_file } augmented = _aug(config)(**annotations) img = augmented['image'] bboxes_per_file = np.array(augmented['bboxes']) labels_per_file = augmented['labels'] #and store imgs[img_idx] = img img_idx += 1 # scale annotation x_scale = config.IMAGE_WIDTH / orig_w y_scale = config.IMAGE_HEIGHT / orig_h #scale boxes bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale bboxes.append(bboxes_per_file) aidx_per_image, delta_per_image = [], [] aidx_set = set() #iterate all bounding boxes for a file for i in range(len(bboxes_per_file)): #compute overlaps of bounding boxes and anchor boxes overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i]) #achor box index aidx = len(config.ANCHOR_BOX) #sort for biggest overlaps for ov_idx in np.argsort(overlaps)[::-1]: #when overlap is zero break if overlaps[ov_idx] <= 0: break #if one is found add and break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx break # if the largest available overlap is 0, choose the anchor box with the one that has the # smallest square distance if aidx == len(config.ANCHOR_BOX): dist = np.sum(np.square(bboxes_per_file[i] - config.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break #compute deltas for regression box_cx, box_cy, box_w, box_h = bboxes_per_file[i] delta = [0] * 4 delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0] ) / config.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1] ) / config.ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) deltas.append(delta_per_image) aidxs.append(aidx_per_image) labels.append(labels_per_file) #we need to transform this batch annotations into a form we can feed into the model label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \ = [], [], [], [], [] aidx_set = set() #iterate batch for i in range(len(labels)): #and annotations for j in range(len(labels[i])): if (i, aidxs[i][j]) not in aidx_set: aidx_set.add((i, aidxs[i][j])) label_indices.append([i, aidxs[i][j], labels[i][j]]) mask_indices.append([i, aidxs[i][j]]) bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)]) box_delta_values.extend(deltas[i][j]) box_values.extend(bboxes[i][j]) #transform them into matrices input_mask = np.reshape( sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS], [1.0] * len(mask_indices)), [config.BATCH_SIZE, config.ANCHORS, 1]) box_delta_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_delta_values) box_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_values) labels = sparse_to_dense( label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES], [1.0] * len(label_indices)) #concatenate ouputs Y = np.concatenate((input_mask, box_input, box_delta_input, labels), axis=-1).astype(np.float32) return imgs, Y
def compute_statistics_for_thresholding(all_boxes, all_classes, all_scores, all_gts, config): """Compute tps, fps, fns, and other stuff for computing APs Arguments: all_boxes {[type]} -- list of predicted boxes all_classes {[type]} -- list of predicted classes all_scores {[type]} --list of predicted scores all_gts {[type]} -- list of all y_trues config {[type]} -- squeezedet config Returns: [type] -- boxes_per_img , boxes_per_gt, np.stack(all_tps), np.stack(all_fps), np.stack(all_fns), is_gt, all_score_thresholds """ boxes_per_img = [] boxes_per_gt = [] all_tps = [] all_fps = [] all_fns = [] all_score_thresholds = [[] for c in range(config.CLASSES)] is_gt = [[] for c in range(config.CLASSES)] for i in range(len(all_boxes)): batch_gt = all_gts[i] batch_classes = all_classes[i] batch_scores = all_scores[i] box_input = batch_gt[:, :, 1:5] labels = batch_gt[:, :, 9:] for j in range(len(all_boxes[i])): boxes_per_img.append(len(all_boxes[i][j])) non_zero_idx = np.sum(box_input[j][:], axis=-1) > 0 nonzero_gts = np.reshape(box_input[j][non_zero_idx], [-1, 4]) boxes_per_gt.append(len(nonzero_gts)) labels_per_image = labels[j] nonzero_labels = [ tuple[0] for labels in labels_per_image[non_zero_idx, :].astype(int) for tuple in enumerate(labels) if tuple[1] == 1 ] tp_per_image = np.zeros(config.CLASSES) fp_per_image = np.zeros(config.CLASSES) fn_per_image = np.zeros(config.CLASSES) assigned_idx = np.zeros_like(batch_classes[j]) for k in range(len(nonzero_gts)): try: ious = utils.batch_iou(np.stack(all_boxes[i][j]), nonzero_gts[k]) current_score = -1 current_idx = -1 for iou_index, iou in enumerate(ious): if iou > config.IOU_THRESHOLD \ and batch_classes[j][iou_index] == nonzero_labels[k] \ and not assigned_idx[iou_index]\ and batch_scores[j][iou_index] > current_score: current_score = batch_scores[j][iou_index] current_idx = iou_index if current_score < 0: fn_per_image[nonzero_labels[k]] += 1 is_gt[nonzero_labels[k]].append(1) all_score_thresholds[nonzero_labels[k]].append(0) else: tp_per_image[nonzero_labels[k]] += 1 assigned_idx[current_idx] = 1 is_gt[nonzero_labels[k]].append(1) all_score_thresholds[nonzero_labels[k]].append( current_score) except: fn_per_image[nonzero_labels[k]] = len(nonzero_gts[k]) for index, ai in enumerate(assigned_idx): if ai == 0: fp_per_image[batch_classes[j][index]] += 1 is_gt[batch_classes[j][index]].append(0) all_score_thresholds[batch_classes[j][index]].append( batch_scores[j][index]) all_tps.append(tp_per_image) all_fns.append(fn_per_image) all_fps.append(fp_per_image) return boxes_per_img, boxes_per_gt, np.stack(all_tps), np.stack( all_fps), np.stack(all_fns), is_gt, all_score_thresholds
def read_image_and_gt_with_original(img_files, gt_files, config): ''' Transform images and send transformed image and label, but also return the image only resized :param img_files: list of image files including the path of a batch :param gt_files: list of gt files including the path of a batch :param config: config dict containing various hyperparameters :return images and annotations ''' labels = [] bboxes = [] deltas = [] aidxs = [] #loads annotations from file def load_annotation(gt_file): with open(gt_file, 'r') as f: lines = f.readlines() f.close() annotations = [] #each line is an annotation bounding box for line in lines: obj = line.strip().split(' ') #get class try: cls = config.CLASS_TO_IDX[obj[0].lower().strip()] # print cls #get coordinates xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) #check for valid bounding boxes assert xmin >= 0.0 and xmin <= xmax, \ 'Invalid bounding box x-coord xmin {} or xmax {} at {}' \ .format(xmin, xmax, gt_file) assert ymin >= 0.0 and ymin <= ymax, \ 'Invalid bounding box y-coord ymin {} or ymax {} at {}' \ .format(ymin, ymax, gt_file) #transform to point + width and height representation x, y, w, h = bbox_transform_inv([xmin, ymin, xmax, ymax]) annotations.append([x, y, w, h, cls]) except: continue return annotations imgs = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) imgs_only_resized = np.zeros((config.BATCH_SIZE, config.IMAGE_HEIGHT, config.IMAGE_WIDTH, config.N_CHANNELS)) img_idx = 0 #iterate files for img_name, gt_name in zip(img_files, gt_files): #open img img = cv2.imread(img_name).astype(np.float32, copy=False) #store original height and width? orig_h, orig_w, _ = [float(v) for v in img.shape] # scale image img = cv2.resize(img, (config.IMAGE_WIDTH, config.IMAGE_HEIGHT)) imgs_only_resized[img_idx] = img #subtract means img = (img - np.mean(img)) / np.std(img) #print(orig_h, orig_w) # load annotations annotations = load_annotation(gt_name) #split in classes and boxes labels_per_file = [a[4] for a in annotations] bboxes_per_file = np.array([a[0:4] for a in annotations]) #TODO enable dynamic Data Augmentation """ if config.DATA_AUGMENTATION: assert mc.DRIFT_X >= 0 and mc.DRIFT_Y > 0, \ 'mc.DRIFT_X and mc.DRIFT_Y must be >= 0' if mc.DRIFT_X > 0 or mc.DRIFT_Y > 0: # Ensures that gt boundibg box is not cutted out of the image max_drift_x = min(gt_bbox[:, 0] - gt_bbox[:, 2] / 2.0 + 1) max_drift_y = min(gt_bbox[:, 1] - gt_bbox[:, 3] / 2.0 + 1) assert max_drift_x >= 0 and max_drift_y >= 0, 'bbox out of image' dy = np.random.randint(-mc.DRIFT_Y, min(mc.DRIFT_Y + 1, max_drift_y)) dx = np.random.randint(-mc.DRIFT_X, min(mc.DRIFT_X + 1, max_drift_x)) # shift bbox gt_bbox[:, 0] = gt_bbox[:, 0] - dx gt_bbox[:, 1] = gt_bbox[:, 1] - dy # distort image orig_h -= dy orig_w -= dx orig_x, dist_x = max(dx, 0), max(-dx, 0) orig_y, dist_y = max(dy, 0), max(-dy, 0) distorted_im = np.zeros( (int(orig_h), int(orig_w), 3)).astype(np.float32) distorted_im[dist_y:, dist_x:, :] = im[orig_y:, orig_x:, :] im = distorted_im # Flip image with 50% probability if np.random.randint(2) > 0.5: im = im[:, ::-1, :] gt_bbox[:, 0] = orig_w - 1 - gt_bbox[:, 0] """ #and store imgs[img_idx] = np.asarray(img) # img_idx += 1 # scale annotation x_scale = config.IMAGE_WIDTH / orig_w y_scale = config.IMAGE_HEIGHT / orig_h #scale boxes bboxes_per_file[:, 0::2] = bboxes_per_file[:, 0::2] * x_scale bboxes_per_file[:, 1::2] = bboxes_per_file[:, 1::2] * y_scale bboxes.append(bboxes_per_file) aidx_per_image, delta_per_image = [], [] aidx_set = set() #iterate all bounding boxes for a file for i in range(len(bboxes_per_file)): #compute overlaps of bounding boxes and anchor boxes overlaps = batch_iou(config.ANCHOR_BOX, bboxes_per_file[i]) #achor box index aidx = len(config.ANCHOR_BOX) #sort for biggest overlaps for ov_idx in np.argsort(overlaps)[::-1]: #when overlap is zero break if overlaps[ov_idx] <= 0: break #if one is found add and break if ov_idx not in aidx_set: aidx_set.add(ov_idx) aidx = ov_idx break # if the largest available overlap is 0, choose the anchor box with the one that has the # smallest square distance if aidx == len(config.ANCHOR_BOX): dist = np.sum(np.square(bboxes_per_file[i] - config.ANCHOR_BOX), axis=1) for dist_idx in np.argsort(dist): if dist_idx not in aidx_set: aidx_set.add(dist_idx) aidx = dist_idx break #compute deltas for regression box_cx, box_cy, box_w, box_h = bboxes_per_file[i] delta = [0] * 4 delta[0] = (box_cx - config.ANCHOR_BOX[aidx][0] ) / config.ANCHOR_BOX[aidx][2] delta[1] = (box_cy - config.ANCHOR_BOX[aidx][1] ) / config.ANCHOR_BOX[aidx][3] delta[2] = np.log(box_w / config.ANCHOR_BOX[aidx][2]) delta[3] = np.log(box_h / config.ANCHOR_BOX[aidx][3]) aidx_per_image.append(aidx) delta_per_image.append(delta) deltas.append(delta_per_image) aidxs.append(aidx_per_image) labels.append(labels_per_file) #print(labels) #we need to transform this batch annotations into a form we can feed into the model label_indices, bbox_indices, box_delta_values, mask_indices, box_values, \ = [], [], [], [], [] aidx_set = set() #iterate batch for i in range(len(labels)): #and annotations for j in range(len(labels[i])): if (i, aidxs[i][j]) not in aidx_set: aidx_set.add((i, aidxs[i][j])) label_indices.append([i, aidxs[i][j], labels[i][j]]) mask_indices.append([i, aidxs[i][j]]) bbox_indices.extend([[i, aidxs[i][j], k] for k in range(4)]) box_delta_values.extend(deltas[i][j]) box_values.extend(bboxes[i][j]) #transform them into matrices input_mask = np.reshape( sparse_to_dense(mask_indices, [config.BATCH_SIZE, config.ANCHORS], [1.0] * len(mask_indices)), [config.BATCH_SIZE, config.ANCHORS, 1]) box_delta_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_delta_values) box_input = sparse_to_dense(bbox_indices, [config.BATCH_SIZE, config.ANCHORS, 4], box_values) labels = sparse_to_dense( label_indices, [config.BATCH_SIZE, config.ANCHORS, config.CLASSES], [1.0] * len(label_indices)) #concatenate ouputs Y = np.concatenate((input_mask, box_input, box_delta_input, labels), axis=-1).astype(np.float32) return imgs, Y, imgs_only_resized
def compute_statistics_for_thresholding(all_boxes, all_classes, all_scores, all_gts, config): """Compute tps, fps, fns, and other stuff for computing APs Arguments: all_boxes {[type]} -- list of predicted boxes all_classes {[type]} -- list of predicted classes all_scores {[type]} --list of predicted scores all_gts {[type]} -- list of all y_trues config {[type]} -- squeezedet config Returns: [type] -- boxes_per_img , boxes_per_gt, np.stack(all_tps), np.stack(all_fps), np.stack(all_fns), is_gt, all_score_thresholds """ boxes_per_img = [] boxes_per_gt = [] all_tps = [] all_fps = [] all_fns = [] all_score_thresholds = [[] for c in range(config.CLASSES)] is_gt = [[] for c in range(config.CLASSES)] #print(all_score_thresholds) #here we compute the false positives, false negatives and true positives of the network predictions #we cannot do everything in a numpy array as each image has a different number of filtered detections #iterate all batches for i in range(len(all_boxes)): batch_gt = all_gts[i] batch_classes = all_classes[i] batch_scores = all_scores[i] #shape is batch_size * achors * x box_input = batch_gt[:, :, 1:5] labels = batch_gt[:, :, 9:] #print(labels.shape) #iterate images per batch for image level analysis for j in range(len(all_boxes[i])): # add number of detections boxes_per_img.append(len(all_boxes[i][j])) #get index of non zero boxes non_zero_idx = np.sum(box_input[j][:], axis=-1) > 0 #get non zero gt boxes nonzero_gts = np.reshape(box_input[j][non_zero_idx], [-1, 4]) # add number of gt boxes boxes_per_gt.append(len(nonzero_gts)) #get labels labels_per_image = labels[j] #get non zero labels nonzero_labels = [ tuple[0] for labels in labels_per_image[non_zero_idx, :].astype(int) for tuple in enumerate(labels) if tuple[1] == 1 ] #for every class count the true positives, false positives and false negatives tp_per_image = np.zeros(config.CLASSES) fp_per_image = np.zeros(config.CLASSES) fn_per_image = np.zeros(config.CLASSES) #print(batch_classes[j]) #use this to check if predicted box has already been assigned to a different gt assigned_idx = np.zeros_like(batch_classes[j]) # for every gt per image compute overlaps with detections for k in range(len(nonzero_gts)): try: #get overlap between gt box and all predictions ious = utils.batch_iou(np.stack(all_boxes[i][j]), nonzero_gts[k]) #use this to check for biggest score current_score = -1 #index of best detection current_idx = -1 #iterate all the ious for iou_index, iou in enumerate(ious): # check if iou is above threshold, if classes match, # if it has not been assigned before and if the score is bigger than the current best score # if all conditions are satisfied this marked as the current biggest detection if iou > config.IOU_THRESHOLD \ and batch_classes[j][iou_index] == nonzero_labels[k] \ and not assigned_idx[iou_index]\ and batch_scores[j][iou_index] > current_score: #update current score current_score = batch_scores[j][iou_index] #update idx of best current_idx = iou_index #if nothing was assigned to this box add a false negative if current_score < 0: fn_per_image[nonzero_labels[k]] += 1 #for mAP calc set this to a gt is_gt[nonzero_labels[k]].append(1) #append 0 as the score, as we did not detect it all_score_thresholds[nonzero_labels[k]].append(0) else: #otherwise add a true positive for the corresponding class tp_per_image[nonzero_labels[k]] += 1 # set to ignore assigned box assigned_idx[current_idx] = 1 #append it as a gt is_gt[nonzero_labels[k]].append(1) #save threshold all_score_thresholds[nonzero_labels[k]].append( current_score) except: fn_per_image[nonzero_labels[k]] = len(nonzero_gts[k]) #calculate false positives, that is boxes that have not been assigned to a gt for index, ai in enumerate(assigned_idx): #if box has not been assigned if ai == 0: #add a false positive to the corresponding class fp_per_image[batch_classes[j][index]] += 1 #add this as a non gt is_gt[batch_classes[j][index]].append(0) #append the predicted score to the predicted class all_score_thresholds[batch_classes[j][index]].append( batch_scores[j][index]) all_tps.append(tp_per_image) all_fns.append(fn_per_image) all_fps.append(fp_per_image) return boxes_per_img, boxes_per_gt, np.stack(all_tps), np.stack( all_fps), np.stack(all_fns), is_gt, all_score_thresholds