def validate(self): self.model.eval() total_loss = [] pred_label = [] target_label = [] with torch.no_grad(): for step, image,label in enumerate(self.val_data_loader): batch = self.batch_to_device(image,label) outputs = self.model(batch['img']) loss = self.criterion(outputs, batch['label']) total_loss.append(loss.item()) pred_label.append(outputs) target_label.append(label) del outputs del loss total_loss = np.mean(total_loss) self.model.train() return total_loss,compute_iou(pred_label,target_label)
def eval_mesh(self, mesh, pointcloud_tgt, normals_tgt, points_iou, occ_tgt): ''' Evaluates a mesh. Args: mesh (trimesh): mesh which should be evaluated pointcloud_tgt (numpy array): target point cloud normals_tgt (numpy array): target normals points_iou (numpy_array): points tensor for IoU evaluation occ_tgt (numpy_array): GT occupancy values for IoU points ''' if len(mesh.vertices) != 0 and len(mesh.faces) != 0: pointcloud, idx = mesh.sample(self.n_points, return_index=True) pointcloud = pointcloud.astype(np.float32) normals = mesh.face_normals[idx] else: pointcloud = np.empty((0, 3)) normals = np.empty((0, 3)) out_dict = self.eval_pointcloud(pointcloud, pointcloud_tgt, normals, normals_tgt) if len(mesh.vertices) != 0 and len(mesh.faces) != 0: occ = check_mesh_contains(mesh, points_iou) out_dict['iou'] = compute_iou(occ, occ_tgt) else: out_dict['iou'] = 0. return out_dict
def test( net, epoch, data_loader, result_file, config, device ): net.eval() if config.DEVICE.find('cuda') != -1: torch.cuda.empty_cache() # 回收缓存的显存 iou = { "TP": {i: 0 for i in range(8)}, "TA": {i: 0 for i in range(8)} } total_loss = 0.0 dataprocess = tqdm(data_loader) for batch_item in dataprocess: image, mask = batch_item['image'], batch_item['mask'] image = image.to(device) mask = mask.to(device) out = net(image) out = F.softmax(out, dim=1) loss = utils.create_loss(out, mask, config.NUM_CLASSES) total_loss += loss.item() # 计算每个类别的混淆矩阵 iou = utils.compute_iou(out, mask, iou) dataprocess.set_description_str("epoch:{}".format(epoch)) dataprocess.set_postfix_str( "mask_loss:{:.4f}".format(loss) ) result_file.write("Epoch:{} \n".format(epoch)) for i in range(8): result_string = "{}: {:.4f} \n".format( i, iou["TP"][i] / iou["TA"][i] ) print(result_string) result_file.write(result_string) # message info = "Epoch:{}, mean loss is {:.4f} \n".format( epoch, total_loss / len(data_loader) ) result_file.write(info) result_file.flush()
def encode_label(image, gt_boxes): target_scores = np.zeros(shape=[45, 60, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[45, 60, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros( shape=[45, 60, 9]) # negative_samples: -1, positive_samples: 1 for i in range(45): # y: height for j in range(60): # x: width for k in range(9): center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & ( ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious > pos_thresh negative_masks = ious < neg_thresh if np.any(positive_masks): plot_boxes_on_image(image, anchor_boxes, thickness=1) print("=> encode: %d, %d, %d" % (i, j, k)) cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[255, 0, 0], thickness=4) target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[0, 0, 0], thickness=4) Image.fromarray(image).show() return target_scores, target_bboxes, target_masks
def compute_target(self, anchors, box): regression_label = box_transform(anchors, box) iou = compute_iou(anchors, box).flatten() pos_index = np.where(iou > config.pos_threshold)[0] neg_index = np.where(iou < config.neg_threshold)[0] classification_label = np.ones_like(iou, dtype=np.float32) * -1 classification_label[pos_index] = 1 classification_label[neg_index] = 0 return regression_label, classification_label
def test(cfg, model, post_processor, criterion, device, test_loader): """ Return: a validation metric between 0-1 where 1 is perfect """ model.eval() post_processor.eval() test_loss = 0 correct = 0 # TODO: use a more consistent evaluation interface pixel_acc_list = [] iou_list = [] with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) feature = model(data) if cfg.task == "classification": output = post_processor(feature) elif cfg.task == "semantic_segmentation": ori_spatial_res = data.shape[-2:] output = post_processor(feature, ori_spatial_res) test_loss += criterion(output, target).item() # sum up batch loss if cfg.task == "classification": pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() elif cfg.task == "semantic_segmentation": pred_map = output.max(dim=1)[1] batch_acc, _ = utils.compute_pixel_acc( pred_map, target, fg_only=cfg.METRIC.SEGMENTATION.fg_only) pixel_acc_list.append(float(batch_acc)) for i in range(pred_map.shape[0]): iou = utils.compute_iou( np.array(pred_map[i].cpu()), np.array(target[i].cpu(), dtype=np.int64), cfg.num_classes, fg_only=cfg.METRIC.SEGMENTATION.fg_only) iou_list.append(float(iou)) else: raise NotImplementedError test_loss /= len(test_loader.dataset) if cfg.task == "classification": acc = 100. * correct / len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'. format(test_loss, correct, len(test_loader.dataset), acc)) return acc elif cfg.task == "semantic_segmentation": m_iou = np.mean(iou_list) print( '\nTest set: Average loss: {:.4f}, Mean Pixel Accuracy: {:.4f}, Mean IoU {:.4f}' .format(test_loss, np.mean(pixel_acc_list), m_iou)) return m_iou else: raise NotImplementedError
def test_epoch(model, dataloader, device, epoch): model.eval() with torch.no_grad(): correct, correct_cls, total = 0, 0, 0 for X, y in dataloader: X, gt_cls, gt_bbox = X.to(device), y['cls'].to( device), y['bbox'].to(device) logits, bbox = model(X) correct += sum(( torch.argmax(logits, axis=1) == gt_cls).cpu().detach().numpy() & (compute_iou(bbox.cpu(), gt_bbox.cpu()) > iou_thr)) correct_cls += sum((torch.argmax(logits, axis=1) == gt_cls)) total += len(X) print(f' val acc: {correct / total * 100:.2f}')
def eval(model, criterion, optimizer, loader, batch_size, epoch_it, rep): model.eval() loss_collect = [] metric_collect = [] if rep == 'occ': sigmoid = torch.nn.Sigmoid() with tqdm(total=int(len(loader)), ascii=True) as pbar: with torch.no_grad(): for mbatch in loader: img_input, points_input, values = mbatch img_input = Variable(img_input).cuda() points_input = Variable(points_input).cuda() values = Variable(values).cuda() optimizer.zero_grad() logits = model(points_input, img_input) loss = criterion(logits, values) loss_collect.append(loss.data.cpu().item()) if rep == 'occ': logits = sigmoid(logits) iou = utils.compute_iou(logits.detach().cpu().numpy(), \ values.detach().cpu().numpy()) metric_collect.append(iou) elif rep == 'sdf': # acc_sign is sign IoU # acc_thres is accuracy within a threshold # More detail explanation in utils.py acc_sign, acc_thres, iou = utils.compute_acc(\ logits.detach().cpu().numpy(), \ values.detach().cpu().numpy()) metric_collect.append([acc_sign, acc_thres, iou]) pbar.update(1) mean_loss = np.mean(np.array(loss_collect)) if rep == 'occ': mean_metric = np.mean(np.concatenate(metric_collect)) mean_metric = [mean_metric] else: mean_metric = np.mean(np.array(metric_collect), axis=0) return mean_loss, mean_metric
def get_abox_idx(bbox, i, j): ''' given a grouth truth bbox, get the index of the anchor box at i,j grid cell that has the highest IOU ''' ious = [] for anchor in anchor_boxes: anchor_coord = get_anchor(anchor, i, j) bbox = np.array(bbox) anchor_coord = np.array(list(anchor_coord)) iou = compute_iou(bbox, anchor_coord) ious.append(iou) m = max(ious) return ious.index(m)
def _match_anchor_boxes(self, anchor_boxes, gt_boxes, match_iou=0.5, ignore_iou=0.4): """Matches ground truth boxes to anchor boxes based on IOU. 1. Calculates the pairwise IOU for the M `anchor_boxes` and N `gt_boxes` to get a `(M, N)` shaped matrix. 2. The ground truth box with the maximum IOU in each row is assigned to the anchor box provided the IOU is greater than `match_iou`. 3. If the maximum IOU in a row is less than `ignore_iou`, the anchor box is assigned with the background class. 4. The remaining anchor boxes that do not have any class assigned are ignored during training. Arguments: anchor_boxes: A float tensor with the shape `(total_anchors, 4)` representing all the anchor boxes for a given input image shape, where each anchor box is of the format `[x, y, width, height]`. gt_boxes: A float tensor with shape `(num_objects, 4)` representing the ground truth boxes, where each box is of the format `[x, y, width, height]`. match_iou: A float value representing the minimum IOU threshold for determining if a ground truth box can be assigned to an anchor box. ignore_iou: A float value representing the IOU threshold under which an anchor box is assigned to the background class. Returns: matched_gt_idx: Index of the matched object positive_mask: A mask for anchor boxes that have been assigned ground truth boxes. ignore_mask: A mask for anchor boxes that need to by ignored during training """ iou_matrix = compute_iou(anchor_boxes, gt_boxes) max_iou = tf.reduce_max(iou_matrix, axis=1) matched_gt_idx = tf.argmax(iou_matrix, axis=1) positive_mask = tf.greater_equal(max_iou, match_iou) negative_mask = tf.less(max_iou, ignore_iou) ignore_mask = tf.logical_not( tf.logical_or(positive_mask, negative_mask)) return ( matched_gt_idx, tf.cast(positive_mask, dtype=tf.float32), tf.cast(ignore_mask, dtype=tf.float32), )
def process_set(sequence_name_list, output_dir, img_dir, anno_dir, embedding_dir, gc_dir, seeds_dir): result_list = [] result_log = [] for s in sequence_name_list: if output_dir: sequence_output = os.path.join(output_dir, s) if not os.path.exists(sequence_output): os.mkdir(sequence_output) else: sequence_output = None frame = 0 length = len(glob.glob('%s/%s/*.jpg' % (img_dir, s))) seg_list = [] gt_list = [] # The last frame is not processed, allowed according to DAVIS evaluation. while frame < length - 1: if frame % 10 == 0: print '%d/%d' % (frame, length) gt = cv2.imread('%s/%s/%05d.png' % (anno_dir, s, frame), flags=cv2.IMREAD_GRAYSCALE) gt = (gt > 0).astype(np.uint8) gt_list.append(gt) seg = vos_frame(s, frame, img_dir, embedding_dir, gc_dir, seeds_dir, frame == length - 2) if sequence_output: cv2.imwrite('%s/seg_%05d.png' % (sequence_output, frame), (seg * 255).astype(np.uint8)) seg_list.append(seg) frame += 1 # Evaluation iou = utils.compute_iou(seg_list, gt_list) mean_iou = np.average(iou) result_log.append('%s\t%.5f\n'%(s, mean_iou)) print s, np.average(iou) result_list.append(np.average(iou)) result_log.append('avg\t%.5f\b'%(np.sum(np.array(result_list))/len(result_list))) print 'avg\t', np.sum(np.array(result_list))/len(result_list) return result_log
def create_response_label(self, response_map, s_x, anchor_id): scale_x = config.instance_size / s_x max_x, max_y = np.unravel_index(response_map.argmax(), response_map.shape) max_x = max_x - 8 max_y = max_y - 8 bbox = [ max_y * config.total_stride, max_x * config.total_stride, self.target_sz[0] * scale_x, self.target_sz[1] * scale_x ] iou = compute_iou(self.anchors, bbox).flatten() pos_index = np.where(iou > config.pos_threshold)[0] neg_index = np.where(iou <= config.neg_threshold)[0] classification_label = np.tile(response_map.flatten(), config.anchor_num) # classification_label = np.ones_like(iou, dtype=np.float32)*-1 classification_label[pos_index] = 1 classification_label[neg_index] = 0 return classification_label[anchor_id * 289:anchor_id * 289 + 289].reshape(17, 17)
def encode_label(gt_boxes): target_scores = np.zeros(shape=[wnum, hnum, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[wnum, hnum, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros( shape=[wnum, hnum, 9]) # negative_samples: -1, positive_samples: 1 for i in range(wnum): # y: height for j in range(hnum): # x: width for k in range(9): center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & ( ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious >= pos_thresh negative_masks = ious <= neg_thresh if np.any(positive_masks): target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample return target_scores, target_bboxes, target_masks
def evaluate(self, train_step): self.sess.run(tf.local_variables_initializer()) all_y = np.zeros( (0, self.conf.height, self.conf.width, self.conf.depth)) all_y_pred = np.zeros( (0, self.conf.height, self.conf.width, self.conf.depth)) for step in range(self.num_val_batch): start = step * self.conf.val_batch_size end = (step + 1) * self.conf.val_batch_size x_val, y_val = self.data_reader.next_batch(start, end, mode='valid') feed_dict = {self.x: x_val, self.y: y_val, self.keep_prob: 1} self.sess.run([self.mean_loss_op, self.mean_accuracy_op], feed_dict=feed_dict) y, y_pred = self.sess.run([self.y, self.y_pred], feed_dict=feed_dict) all_y = np.concatenate((all_y, y), axis=0) all_y_pred = np.concatenate((all_y_pred, y_pred), axis=0) IOU = compute_iou(all_y_pred, all_y, num_cls=self.conf.num_cls) mean_IOU = np.mean(IOU) summary_valid = self.sess.run(self.merged_summary, feed_dict=feed_dict) valid_loss, valid_acc = self.sess.run( [self.mean_loss, self.mean_accuracy]) self.save_summary(summary_valid, train_step + self.conf.reload_step) if valid_acc > self.best_validation_accuracy: self.best_validation_accuracy = valid_acc improved_str = '(improved)' self.save(train_step + self.conf.reload_step) else: improved_str = '' print('-' * 25 + 'Validation' + '-' * 25) print( 'After {0} training step: val_loss= {1:.4f}, val_acc={2:.01%}{3}'. format(train_step, valid_loss, valid_acc, improved_str)) print( 'BackGround={0:.01%}, Neuron={1:.01%}, Vessel={2:.01%}, Average={3:.01%}' .format(IOU[0], IOU[1], IOU[2], mean_IOU)) print('-' * 60)
def train_epoch(model, dataloader, criterion: dict, optimizer, scheduler, epoch, device): model.train() bar = tqdm(dataloader) bar.set_description(f'epoch {epoch:2}') correct, total = 0, 0 for X, y in bar: X, gt_cls, gt_bbox = X.to(device), y['cls'].to(device), y['bbox'].to( device) logits, bbox = model(X) loss = criterion['cls'](logits, gt_cls) + 10 * criterion['box'](bbox, gt_bbox) optimizer.zero_grad() loss.backward() optimizer.step() correct += sum( (torch.argmax(logits, axis=1) == gt_cls).cpu().detach().numpy() & (compute_iou(bbox.cpu(), gt_bbox.cpu()) > iou_thr)) total += len(X) bar.set_postfix_str( f'lr={scheduler.get_last_lr()[0]:.4f} acc={correct / total * 100:.2f} loss={loss.item():.2f}' ) scheduler.step()
def test(self, step_num): self.sess.run(tf.local_variables_initializer()) self.reload(step_num) self.data_reader = DataLoader(self.conf) self.numTest = self.data_reader.count_num_samples(mode='test') self.num_test_batch = int(self.numTest / self.conf.val_batch_size) self.is_train = False self.sess.run(tf.local_variables_initializer()) all_y = np.zeros( (0, self.conf.height, self.conf.width, self.conf.depth)) all_y_pred = np.zeros( (0, self.conf.height, self.conf.width, self.conf.depth)) for step in range(self.num_test_batch): start = step * self.conf.val_batch_size end = (step + 1) * self.conf.val_batch_size x_test, y_test = self.data_reader.next_batch(start, end, mode='test') feed_dict = {self.x: x_test, self.y: y_test, self.keep_prob: 1} self.sess.run([self.mean_loss_op, self.mean_accuracy_op], feed_dict=feed_dict) y, y_pred = self.sess.run([self.y, self.y_pred], feed_dict=feed_dict) all_y = np.concatenate((all_y, y), axis=0) all_y_pred = np.concatenate((all_y_pred, y_pred), axis=0) IOU = compute_iou(all_y_pred, all_y, num_cls=self.conf.num_cls) mean_IOU = np.mean(IOU) test_loss, test_acc = self.sess.run( [self.mean_loss, self.mean_accuracy]) print('-' * 18 + 'Test Completed' + '-' * 18) print('test_loss= {0:.4f}, test_acc={1:.01%}'.format( test_loss, test_acc)) print( 'BackGround={0:.01%}, Neuron={1:.01%}, Vessel={2:.01%}, Average={3:.01%}' .format(IOU[0], IOU[1], IOU[2], mean_IOU)) print('-' * 50)
# num_workers=opt.num_workers, # val_ratio=0.1, pin_memory=opt.pin_memory) if opt.n_gpu > 1: model = nn.DataParallel(model) if opt.is_cuda: model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=opt.weight_decay) criterion = nn.BCELoss().cuda() # start to run a training run_train(model, train_loader, val_loader, opt, criterion) # make prediction on validation set predictions, img_ids = run_test(model, val_loader, opt) # compute IOU between prediction and ground truth masks compute_iou(predictions, img_ids, val_loader) # SAVE model if opt.save_model: torch.save(model.state_dict(), os.path.join(opt.checkpoint_dir, 'model-01.pt')) else: # load testing data for making predictions test_loader = get_test_loader(opt.test_dir, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=opt.pin_memory) # load the model and run test model.load_state_dict( torch.load(os.path.join(opt.checkpoint_dir, 'model-01.pt'))) if opt.n_gpu > 1:
def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config): """Generate targets for training Stage 2 classifier and mask heads. This is not used in normal training. It's useful for debugging or to train the Mask RCNN heads without using the RPN head. Inputs: rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. gt_class_ids: [instance count] Integer class IDs gt_boxes: [instance count, (y1, x1, y2, x2)] gt_masks: [height, width, instance count] Grund truth masks. Can be full size or mini-masks. Returns: rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific bbox refinements. masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped to bbox boundaries and resized to neural network output size. """ assert rpn_rois.shape[0] > 0 assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( gt_class_ids.dtype) assert gt_boxes.dtype == np.int32, "Expected int but got {}".format( gt_boxes.dtype) assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format( gt_masks.dtype) # It's common to add GT Boxes to ROIs but we don't do that here because # according to XinLei Chen's paper, it doesn't help. # Trim empty padding in gt_boxes and gt_masks parts instance_ids = np.where(gt_class_ids > 0)[0] assert instance_ids.shape[0] > 0, "Image must contain instances." gt_class_ids = gt_class_ids[instance_ids] gt_boxes = gt_boxes[instance_ids] gt_masks = gt_masks[:, :, instance_ids] # Compute areas of ROIs and ground truth boxes. rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \ (rpn_rois[:, 3] - rpn_rois[:, 1]) gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \ (gt_boxes[:, 3] - gt_boxes[:, 1]) # Compute overlaps [rpn_rois, gt_boxes] overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0])) for i in range(overlaps.shape[1]): gt = gt_boxes[i] overlaps[:, i] = utils.compute_iou(gt, rpn_rois, gt_box_area[i], rpn_roi_area) # Assign ROIs to GT boxes rpn_roi_iou_argmax = np.argmax(overlaps, axis=1) rpn_roi_iou_max = overlaps[np.arange(overlaps.shape[0]), rpn_roi_iou_argmax] # GT box assigned to each ROI rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax] rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax] # Positive ROIs are those with >= 0.5 IoU with a GT box. fg_ids = np.where(rpn_roi_iou_max > 0.5)[0] # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining) # TODO: To hard example mine or not to hard example mine, that's the question # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0] bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] # Subsample ROIs. Aim for 33% foreground. # FG fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) if fg_ids.shape[0] > fg_roi_count: keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False) else: keep_fg_ids = fg_ids # BG remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0] if bg_ids.shape[0] > remaining: keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) else: keep_bg_ids = bg_ids # Combine indicies of ROIs to keep keep = np.concatenate([keep_fg_ids, keep_bg_ids]) # Need more? remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0] if remaining > 0: # Looks like we don't have enough samples to maintain the desired # balance. Reduce requirements and fill in the rest. This is # likely different from the Mask RCNN paper. # There is a small chance we have neither fg nor bg samples. if keep.shape[0] == 0: # Pick bg regions with easier IoU threshold bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] assert bg_ids.shape[0] >= remaining keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) assert keep_bg_ids.shape[0] == remaining keep = np.concatenate([keep, keep_bg_ids]) else: # Fill the rest with repeated bg rois. keep_extra_ids = np.random.choice(keep_bg_ids, remaining, replace=True) keep = np.concatenate([keep, keep_extra_ids]) assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \ "keep doesn't match ROI batch size {}, {}".format( keep.shape[0], config.TRAIN_ROIS_PER_IMAGE) # Reset the gt boxes assigned to BG ROIs. rpn_roi_gt_boxes[keep_bg_ids, :] = 0 rpn_roi_gt_class_ids[keep_bg_ids] = 0 # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement. rois = rpn_rois[keep] roi_gt_boxes = rpn_roi_gt_boxes[keep] roi_gt_class_ids = rpn_roi_gt_class_ids[keep] roi_gt_assignment = rpn_roi_iou_argmax[keep] # Class-aware bbox deltas. [y, x, log(h), log(w)] bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.NUM_CLASSES, 4), dtype=np.float32) pos_ids = np.where(roi_gt_class_ids > 0)[0] bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement( rois[pos_ids], roi_gt_boxes[pos_ids, :4]) # Normalize bbox refinements bboxes /= config.BBOX_STD_DEV # Generate class-specific target masks. masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES), dtype=np.float32) for i in pos_ids: class_id = roi_gt_class_ids[i] assert class_id > 0, "class id must be greater than 0" gt_id = roi_gt_assignment[i] class_mask = gt_masks[:, :, gt_id] if config.USE_MINI_MASK: # Create a mask placeholder, the size of the image placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool) # GT box gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id] gt_w = gt_x2 - gt_x1 gt_h = gt_y2 - gt_y1 # Resize mini mask to size of GT box placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \ np.round(scipy.misc.imresize(class_mask.astype(float), (gt_h, gt_w), interp='nearest') / 255.0).astype(bool) # Place the mini batch in the placeholder class_mask = placeholder # Pick part of the mask and resize it y1, x1, y2, x2 = rois[i].astype(np.int32) m = class_mask[y1:y2, x1:x2] mask = scipy.misc.imresize( m.astype(float), config.MASK_SHAPE, interp='nearest') / 255.0 masks[i, :, :, class_id] = mask return rois, roi_gt_class_ids, bboxes, masks
def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks): """Generate targets for training Stage 2 classifier and mask heads. This is not used in normal training. It's useful for debugging or to train the Mask RCNN heads without using the RPN head. Inputs: rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. gt_class_ids: [instance count] Integer class IDs gt_boxes: [instance count, (y1, x1, y2, x2)] gt_masks: [height, width, instance count] Ground truth masks. Can be full size or mini-masks. Returns: rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific bbox refinements. masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped to bbox boundaries and resized to neural network output size. """ assert rpn_rois.shape[0] > 0 assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( gt_class_ids.dtype) assert gt_boxes.dtype == np.int32, "Expected int but got {}".format( gt_boxes.dtype) assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format( gt_masks.dtype) instance_ids = np.where(gt_class_ids > 0)[0] assert instance_ids.shape[0] > 0, "Image must contain instances." gt_class_ids = gt_class_ids[instance_ids] gt_boxes = gt_boxes[instance_ids] gt_masks = gt_masks[:, :, instance_ids] rpn_rois_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * (rpn_rois[:, 3] - rpn_rois[:, 1]) gt_boxes_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1]) overlaps = np.zeros([rpn_rois.shape[0], gt_boxes.shape[0]]) for i in range(gt_boxes.shape[0]): box = gt_boxes[i] overlaps[:, i] = utils.compute_iou(box, rpn_rois, gt_boxes_area[i], rpn_rois_area) rpn_rois_iou_argmax = np.argmax(overlaps, axis=1) rpn_rois_iou_max = overlaps[np.arange(overlaps.shape[0]), rpn_rois_iou_argmax] rpn_roi_gt_boxes = gt_boxes[rpn_rois_iou_argmax] rpn_roi_gt_class_ids = gt_class_ids[rpn_rois_iou_argmax] fg_ids = np.where(rpn_rois_iou_max > 0.5)[0] bg_ids = np.where(rpn_rois_iou_max < 0.5)[0] fg_count = int(hyper_parameters.FLAGS.ROI_POSITIVE_RATIO * hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE) if fg_ids.shape[0] > fg_count: keep_fg_ids = np.random.choice(fg_ids, fg_count, replace=False) else: keep_fg_ids = fg_ids remaining = hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[ 0] if bg_ids.shape[0] > remaining: keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) else: keep_bg_ids = bg_ids keep = np.concatenate([keep_fg_ids, keep_bg_ids]) remaining = hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE - keep.shape[0] if remaining > 0: keep_extra_ids = np.random.choice(keep_bg_ids, remaining, replace=True) keep = np.concatenate([keep, keep_extra_ids]) assert keep.shape[0] == hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE, \ "keep doesn't match ROI batch size {}, {}".format( keep.shape[0], hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE) rpn_roi_gt_boxes[keep_bg_ids, :] = 0 rpn_roi_gt_class_ids[keep_bg_ids] = 0 rois = rpn_rois[keep] roi_gt_boxes = rpn_roi_gt_boxes[keep] roi_gt_class_ids = rpn_roi_gt_class_ids[keep] roi_gt_assignment = rpn_rois_iou_argmax[keep] bboxes = np.zeros([ hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE, hyper_parameters.FLAGS.NUM_CLASSES, 4 ], dtype=np.float32) pos_ids = np.where(roi_gt_class_ids > 0)[0] bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement( rois[pos_ids], roi_gt_boxes[pos_ids][:4]) bboxes /= hyper_parameters.FLAGS.BBOX_STD_DEV masks = np.zeros((hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE, hyper_parameters.FLAGS.MASK_SHAPE[0], hyper_parameters.FLAGS.MASK_SHAPE[1], hyper_parameters.FLAGS.NUM_CLASSES), dtype=np.float32) for i in pos_ids: class_id = roi_gt_class_ids[i] assert class_id > 0, "class id must be greater than 0" assert isinstance(i, int) gt_id = roi_gt_assignment[i] class_mask = gt_masks[:, :, gt_id] if hyper_parameters.FLAGS.USE_MINI_MASK: # Create a mask placeholder, the size of the image placeholder = np.zeros(hyper_parameters.FLAGS.IMAGE_SHAPE[:2], dtype=bool) # GT box gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id] gt_w = gt_x2 - gt_x1 gt_h = gt_y2 - gt_y1 # Resize mini mask to size of GT box placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \ np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool) # Place the mini batch in the placeholder class_mask = placeholder # Pick part of the mask and resize it y1, x1, y2, x2 = rois[i].astype(np.int32) m = class_mask[y1:y2, x1:x2] mask = utils.resize(m, hyper_parameters.FLAGS.MASK_SHAPE) masks[i, :, :, class_id] = mask return rois, roi_gt_class_ids, bboxes, masks
def get_anomalies_sequential(video_reader, reid_model_path, fbf_results_dict, static_results_dict, ignore_matrix_gen=None, reid_model_name="resnet50", start_frame=1, frame_interval=20, abnormal_duration_thresh=60, detect_thresh=5, undetect_thresh=8, score_thresh=0.3, light_thresh=0.8, anomaly_score_thresh=0.7, similarity_thresh=0.95, suspicious_time_thresh=18, verbose=False, anomaly_nms_thresh=0.8): """ Performs the anomaly detection. Sequential version video_reader: VideoReader object for raw video reid_model_path: path to re-ID model checkpoint fbf_results_dict: ResultsDict object for frame-by-frame/raw video detection results static_results_dict: ResultsDict object for static/background detection results ignore_matrix_gen: generator yielding ignore matrix, must have the same interval as frame_interval. Or single numpy array, or path to .npy file. reid_model_name: backbone used for reid model start_frame: video frame to start from frame_interval: interval between frames to do calculations on abnormal_duration_thresh: duration (in seconds) to consider an object abnormal detect_thresh: duration (in frames) to consider an object for tracking undetect_thresh: duration (in frames) to stop considering an object for tracking score_thresh: detection score threshold for bounding boxes light_thresh: brightness threshold (not sure what it does) anomaly_score_thresh: threshold to consider an object an anomaly similarity_thresh: threshold for object re-ID suspicious_time_thresh: duration (in seconds) for an object to be considered suspicious verbose: verbose printing anomaly_nms_thresh: IoU threshold for anomaly NMS. """ def get_ignore_gen(ign_matrix): """ Handles different inputs for ignore matrix :param ign_matrix: :return: """ if isinstance(ign_matrix, types.GeneratorType): return ign_matrix # load/create matrix if ign_matrix is None: matrix = np.ones((h, w), dtype=bool) # Dont ignore anything elif type(ign_matrix) == str: # filename matrix = np.load(ign_matrix).astype(bool) else: raise TypeError("Invalid ignore matrix type:", type(ign_matrix)) return (matrix for _ in iter(int, 1)) # infinite generator # Get video data num_frames, framerate, image_shape = video_reader.nframes, video_reader.framerate, video_reader.img_shape # load model reid_model = ReidExtractor(reid_model_name, reid_model_path) # Set up information matrices h, w, _ = image_shape ignore_matrix_gen = get_ignore_gen(ignore_matrix_gen) detect_count_matrix = np.zeros((h, w)) undetect_count_matrix = np.zeros((h, w)) start_time_matrix = np.zeros((h, w)) end_time_matrix = np.zeros((h, w)) score_matrix = np.zeros((h, w)) state_matrix = np.zeros( (h, w), dtype=bool ) # State matrix, 0/1 distinguishes suspicious candidate states if verbose: print( f"total frames: {num_frames}, framerate: {framerate}, height: {h}, width: {w}" ) print("-------------------------") ### Main loop start = False tmp_start = False all_results = [] anomaly_now = {} for frame in range(start_frame, num_frames, frame_interval): try: ignore_matrix = next(ignore_matrix_gen) # if frame % (10*30) == 0: # plt.imshow(ignore_matrix) # plt.show() except StopIteration: pass # keep same ignore matrix # Comment out if not using crop boxes, not needed # if fbf_results_dict.max_frame < static_results_dict.max_frame: # fbf_results_dict.gen_next() # create tmp_score, tmp_detect static_results = static_results_dict[frame] if static_results is not None: boxes = static_results.loc[ static_results["score"] > score_thresh, ["x1", "y1", "x2", "y2", "score"]].values else: boxes = [] tmp_score, tmp_detect = add_boxes(boxes, ignore_matrix) ### plotting # img = video_reader.get_frame(frame) # cmap = plt.get_cmap("viridis") # for x1, y1, x2, y2, score in boxes: # x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) # col = tuple(int(c * 255) for c in cmap(score)[:3]) # cv.rectangle(img, (x1, y1), (x2, y2), col, thickness=2) # # if frame % 12 == 0: # plt.imshow(img) # plt.show() ### if verbose: print(f"frame: {frame}") if len(boxes) > 0: print("\tboxes:", len(boxes)) score_matrix += tmp_score # add running totals detect_count_matrix += tmp_detect # Update detection matrices undetect_count_matrix += ~tmp_detect undetect_count_matrix[tmp_detect] = 0 # Update time matrices start_time_matrix[ detect_count_matrix == 1] = -600 if frame == 1 else frame # why -600 for frame 1? end_time_matrix[detect_count_matrix > 0] = frame # Update state matrices state_matrix[detect_count_matrix > detect_thresh] = True # Detect anomaly time_delay = utils.mask(end_time_matrix - start_time_matrix, state_matrix) delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) # print(f"\tmax delay: {time_delay.max()}, start: {start_time_matrix[delay_max_idx]}, end: {end_time_matrix[delay_max_idx]}, state: {state_matrix[delay_max_idx]}") if not start and time_delay.max( ) / framerate > abnormal_duration_thresh: # and score_matrix[delay_max_idx]/detect_count_matrix[delay_max_idx]>0.8: delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) # backtrack the start time time_frame = int(start_time_matrix[delay_max_idx] / 5) * 5 # + 1 # why 5s and 1? G = np.where( detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0, 1) # What does G represent?, why -2? region = utils.search_region(G, delay_max_idx) # vehicle reid if 'start_time' in anomaly_now and ( time_frame / framerate - anomaly_now['end_time']) < 30: # why 30? f1_frame_num = max(1, anomaly_now['start_time'] * framerate) f2_frame_num = max(1, time_frame) similarity = reid_model.similarity( video_reader.get_frame(f1_frame_num), video_reader.get_frame(f2_frame_num), anomaly_now["region"], region) if similarity > similarity_thresh: time_frame = int(anomaly_now['start_time'] * framerate / 5) * 5 # + 1 # why 5s and 1? else: anomaly_now['region'] = region else: anomaly_now['region'] = region # IoU stuff max_iou = 1 count = 1 start_time = time_frame tmp_len = 1 raio = 1 while (max_iou > 0.1 or tmp_len < 40 or raio > 0.6) and time_frame > 1: # why 0.1, 40, 0.6? raio = count / tmp_len print("time frame:", time_frame) fbf_results = fbf_results_dict[time_frame] if fbf_results is not None: bboxes = fbf_results[["x1", "y1", "x2", "y2", "score"]].values max_iou = utils.compute_iou(anomaly_now['region'], bboxes) else: max_iou = 0 time_frame -= 5 # why 5? if max_iou > 0.3: # why 0.3? count += 1 if max_iou > 0.5: # why 0.5? # they mention 0.5 IoU in the paper for NMS, might not be this start_time = time_frame tmp_len += 1 # back track start_time, until brightness at that spot falls below a threshold for time_frame in range(start_time, 1, -5): # print(f"\ttimeframe: {time_frame}") tmp_im = video_reader.get_frame(time_frame) if utils.compute_brightness( tmp_im[region[1]:region[3], region[0]:region[2]]) <= light_thresh: break start_time = time_frame anomaly_now['start_time'] = max(0, start_time / framerate) anomaly_now['end_time'] = max( 0, end_time_matrix[delay_max_idx] / framerate) start = True elif not tmp_start and time_delay.max( ) > suspicious_time_thresh * framerate: time_frame = start_time_matrix[delay_max_idx] G = np.where( detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0, 1) # what does G represent? region = utils.search_region(G, delay_max_idx) # vehicle reid if 'start_time' in anomaly_now and ( time_frame / framerate - anomaly_now['end_time']) < 30: # why 30? f1_frame_num = max(1, anomaly_now['start_time'] * framerate) f2_frame_num = max(1, time_frame) similarity = reid_model.similarity( video_reader.get_frame(f1_frame_num), video_reader.get_frame(f2_frame_num), anomaly_now["region"], region) if similarity > similarity_thresh: time_frame = int( anomaly_now['start_time'] * framerate / 5) * 5 + 1 region = anomaly_now['region'] anomaly_now['region'] = region anomaly_now['start_time'] = max(0, time_frame / framerate) anomaly_now['end_time'] = max( 0, end_time_matrix[delay_max_idx] / framerate) tmp_start = True if start and time_delay.max() / framerate > abnormal_duration_thresh: delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) if undetect_count_matrix[delay_max_idx] > undetect_thresh: anomaly_score = score_matrix[ delay_max_idx] / detect_count_matrix[delay_max_idx] print("\t", anomaly_now, anomaly_score) if anomaly_score > anomaly_score_thresh: anomaly_now['end_time'] = end_time_matrix[ delay_max_idx] / framerate anomaly_now['score'] = anomaly_score all_results.append(anomaly_now) anomaly_now = {} start = False elif tmp_start and time_delay.max( ) > suspicious_time_thresh * framerate: if undetect_count_matrix[delay_max_idx] > undetect_thresh: anomaly_score = score_matrix[ delay_max_idx] / detect_count_matrix[delay_max_idx] if anomaly_score > anomaly_score_thresh: anomaly_now['end_time'] = end_time_matrix[ delay_max_idx] / framerate anomaly_now['score'] = anomaly_score tmp_start = False # undetect matrix change state_matrix state_matrix[undetect_count_matrix > undetect_thresh] = False undetect_count_matrix[undetect_count_matrix > undetect_thresh] = 0 # update matrix tmp_detect |= state_matrix detect_count_matrix = utils.mask(detect_count_matrix, tmp_detect) score_matrix = utils.mask(score_matrix, tmp_detect) # Add all anomalies to the results list print("---", start, time_delay.max(), score_matrix[delay_max_idx], detect_count_matrix[delay_max_idx]) if start and time_delay.max() > abnormal_duration_thresh * framerate: anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[ delay_max_idx] if anomaly_score > anomaly_score_thresh: anomaly_now[ 'end_time'] = end_time_matrix[delay_max_idx] / framerate anomaly_now['score'] = anomaly_score all_results.append(anomaly_now) anomaly_now = {} start = False # Apply Non-Maximal Supression to the results if all_results: nms_out = utils.anomaly_nms(all_results, anomaly_nms_thresh) # final_result = {'start_time': 892, 'score': 0} # why 892? # for nms_start_time, nms_end_time in nms_out[:, 5:7]: # if nms_start_time < final_result["start_time"]: # final_result["start_time"] = max(0, int(nms_start_time - 1)) # final_result["score"] = 1 # final_result["end_time"] = nms_end_time final_results = pd.DataFrame(nms_out, columns=[ "x1", "y1", "x2", "y2", "score", "start_time", "end_time" ]) return final_results return None
def get_overall_IOU(boundingBoxes, sort_inds, g_fnames, q_fnames): """ boudningBoxes: boundingBoxes class which is the list all the bounding boxes in the images g_fnames = gallyer filenames. without the extension (no .png) eg. '24889' """ n_query = len(q_fnames) avgIouArray = np.zeros((n_query, 5)) weightedIouArray = np.zeros((n_query, 5)) allClasses = boundingBoxes.getClasses() classIou = dict([(key, []) for key in allClasses]) for i in range((sort_inds.shape[0])): #Iterate over all the query images ts = time.time() qImageName = q_fnames[i] qBBoxes = boundingBoxes.getBoundingBoxesByImageName(qImageName) for j in range(5): # Iterate over top-5 retrieved images rImageName = g_fnames[sort_inds[i][j]] rBBoxes = boundingBoxes.getBoundingBoxesByImageName(rImageName) iouTemp = [] weights = [] #Iterate over each element(boudingbox) for bb in qBBoxes: # qbbs query bounding boxes bb_cordinates = bb.getBoundingBox() bb_class = bb.classId #get the bouding box in retrieved image that has same class rbbs = [d for d in rBBoxes if d.classId == bb_class] iouMax = 0 # sys.float_info.min for rbb in rbbs: assert (rbb.classId == bb_class) rbb_cordinates = rbb.getBoundingBox() iou = compute_iou(bb_cordinates, rbb_cordinates) if iou > iouMax: iouMax = iou if iou < 0: print('Warning!!: Negative iou found ', 'ImageName:', rbb.getImageName(), ' bounding box', rbb.getBoundingBox()) assert (iouMax >= 0) #Store iou with best matched component label iouTemp.append(iouMax) weights.append(bb_cordinates[2] * bb_cordinates[3]) # Update iou into coressponding ClassIou classIou[bb_class].append(iouMax) avgIouArray[i][j] = np.mean( iouTemp) # Average Iou between a query and a retrieved image weightTotal = np.sum(weights) weights = np.divide(weights, weightTotal) weightedIou = sum(iouTemp * weights) weightedIouArray[i][j] = weightedIou #print('Computing IoU metric: {}/{}'.format(i,50)) #print('Time for query{} = {}'.format(i, time.time()-ts)) print('Computing IoU: {}/{} in time {}'.format( i, n_query, time.time() - ts)) ts = time.time() meanAvgIou = np.mean(avgIouArray, axis=1) overallMeanIou = np.mean(meanAvgIou) meanWeightedIou = np.mean(weightedIouArray, axis=1) overallMeanWeightedIou = np.mean(meanWeightedIou) print('Completed computing IoU metric: {}/{}'.format(i + 1, n_query)) return overallMeanIou, overallMeanWeightedIou, classIou
netD = netD() criterion = torch.nn.BCELoss() # netD.apply(weights_init) if Opt.ngpu > 1: netG = nn.DataParallel(netG) netD = nn.DataParallel(netD) if Opt.is_cuda: netG = netG.cuda() netD = netD.cuda() criterion = criterion.cuda() # Optimizers optimizerG = torch.optim.Adam(netG.parameters(), lr=Opt.lr, betas=Opt.betas, weight_decay=Opt.weight_decay) optimizerD = torch.optim.Adam(netD.parameters(), lr=Opt.lr, betas=Opt.betas, weight_decay=Opt.weight_decay) if Opt.is_train: # predictions, img_ids = test(netG, val_loader) # compute_iou(predictions, img_ids, 'UNet_IOU') train(train_loader, netD, netG, criterion, optimizerG, optimizerD) # predictions, img_ids = test(netG, val_loader) # compute_iou(predictions, img_ids, 'GAN_IOU') else: predictions, img_ids = test(netG, val_loader) compute_iou(predictions, img_ids, 'UNet_IOU')
pred_score.append(score[i, j, k, 1]) pred_boxes = np.array(pred_boxes) pred_score = np.array(pred_score) # selected_boxes = pred_boxes selected_boxes = [] while len(pred_boxes) > 0: max_idx = np.argmax(pred_score) selected_box = pred_boxes[max_idx] selected_boxes.append(selected_box) pred_boxes = np.concatenate( [pred_boxes[:max_idx], pred_boxes[max_idx + 1:]]) pred_score = np.concatenate( [pred_score[:max_idx], pred_score[max_idx + 1:]]) ious = compute_iou(selected_box, pred_boxes) iou_mask = ious <= 0.1 pred_boxes = pred_boxes[iou_mask] pred_score = pred_score[iou_mask] selected_boxes = np.array(selected_boxes) plot_boxes_on_image(raw_image, selected_boxes) Image.fromarray(np.uint8(raw_image)).show() grid_size = [45, 60] grid_x = tf.range(grid_size[0], dtype=tf.int32) grid_y = tf.range(grid_size[1], dtype=tf.int32) a, b = tf.meshgrid(grid_x, grid_y) x_offset = tf.reshape(a, (-1, 1)) y_offset = tf.reshape(b, (-1, 1))
# 假设 图片中的两个目标框"ground-truth" bbox = np.asarray([[20, 30, 400, 500], [300, 400, 500, 600]], dtype=np.float32) # [y1, x1, y2, x2] format # 假设 图片中两个目标框分别对应的标签 labels = np.asarray([6, 8], dtype=np.int8) # 0 represents background img_tensor = torch.zeros((1, 3, 800, 800)).float() img_var = torch.autograd.Variable(img_tensor) # ---------------------step_1: 获取目标anchor的置信度(anchor_conf)和平移缩放系数(anchor_locations) # 初始化所有anchors, 并找出有效anchors和对应的index # anchors: (22500, 4) valid_anchor_boxes: (8940, 4) valid_anchor_index:8940 anchors, valid_anchor_boxes, valid_anchor_index = utils.init_anchor() # 计算有效anchors与所有目标框的IOU # ious:(8940, 2) 每个有效anchor框与目标实体框的IOU ious = utils.compute_iou(valid_anchor_boxes, bbox) valid_anchor_len = len(valid_anchor_boxes) # 在有效框中找到一定比例的正例和负例 label, argmax_ious = utils.get_pos_neg_sample(ious, valid_anchor_len, pos_iou_threshold=0.7, neg_iou_threshold=0.3, pos_ratio=0.5, n_sample=256) # print np.sum(label == 1) # 18个正例 # print np.sum(label == 0) # 256-18=238个负例 # 现在让我们用具有最大iou的ground truth对象为每个anchor box分配位置。 # 注意,我们将为所有有效的anchor box分配anchor locs,而不考虑其标签,稍后在计算损失时,我们可以使用简单的过滤器删除它们。 # 每个有效anchor对应的目标框bbox max_iou_bbox = bbox[argmax_ious] # 有效anchor框对应的目标框坐标 (8940, 4)
center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & (ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious > pos_thresh negative_masks = ious < neg_thresh if np.any(positive_masks): plot_boxes_on_image(encoded_image, anchor_boxes, thickness=1) print("=> Encoding positive sample: %d, %d, %d" % (i, j, k)) cv2.circle(encoded_image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[255, 0, 0], thickness=4)
def test(cfg, model, post_processor, criterion, device, test_loader, visfreq): model.eval() post_processor.eval() test_loss = 0 correct = 0 pixel_acc_list = [] iou_list = [] with torch.no_grad(): for idx, (data, target) in enumerate(test_loader): data, target = data.to(device), target.to(device) feature = model(data) output = post_processor(feature) test_loss += criterion(output, target).item() # sum up batch loss if cfg.task == "classification": pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() # TODO: save classified images with raw image as content and # human readable label as filenames elif cfg.task == "semantic_segmentation": pred_map = output.max(dim=1)[1] batch_acc, _ = utils.compute_pixel_acc( pred_map, target, fg_only=cfg.METRIC.SEGMENTATION.fg_only) pixel_acc_list.append(float(batch_acc)) for i in range(pred_map.shape[0]): pred_np = np.array(pred_map[i].cpu()) target_np = np.array(target[i].cpu(), dtype=np.int64) iou = utils.compute_iou( pred_np, target_np, cfg.num_classes, fg_only=cfg.METRIC.SEGMENTATION.fg_only) iou_list.append(float(iou)) if (i + 1) % visfreq == 0: cv2.imwrite("{}_{}_pred.png".format(idx, i), pred_np) cv2.imwrite("{}_{}_label.png".format(idx, i), target_np) # Visualize RGB image as well ori_rgb_np = np.array(data[i].permute((1, 2, 0)).cpu()) if 'normalize' in cfg.DATASET.TRANSFORM.TEST.transforms: rgb_mean = cfg.DATASET.TRANSFORM.TEST.TRANSFORMS_DETAILS.NORMALIZE.mean rgb_sd = cfg.DATASET.TRANSFORM.TEST.TRANSFORMS_DETAILS.NORMALIZE.sd ori_rgb_np = (ori_rgb_np * rgb_sd) + rgb_mean assert ori_rgb_np.max() <= 1.1, "Max is {}".format( ori_rgb_np.max()) ori_rgb_np[ori_rgb_np >= 1] = 1 ori_rgb_np = (ori_rgb_np * 255).astype(np.uint8) # Convert to OpenCV BGR ori_rgb_np = cv2.cvtColor(ori_rgb_np, cv2.COLOR_RGB2BGR) cv2.imwrite("{}_{}_ori.jpg".format(idx, i), ori_rgb_np) else: raise NotImplementedError test_loss /= len(test_loader.dataset) if cfg.task == "classification": print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'. format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) elif cfg.task == "semantic_segmentation": print( '\nTest set: Average loss: {:.4f}, Mean Pixel Accuracy: {:.4f}, Mean IoU {:.4f}\n' .format(test_loss, np.mean(pixel_acc_list), np.mean(iou_list))) else: raise NotImplementedError
def get_overall_IOU_ndcg(boundingBoxes,sort_inds,g_fnames,q_fnames): allClasses = boundingBoxes.getClasses() classIou = dict([(key, []) for key in allClasses]) aNdcg = np.empty((1,0),float) wNdcg =np.empty((1,0),float) for i in range((sort_inds.shape[0])): #Iterate over all the query images qImageName = q_fnames[i] qBBoxes = boundingBoxes.getBoundingBoxesByImageName(qImageName) iouList = [] weightedIouList = [] time_s = time.time() #for j in range(len(g_fnames)): # Iterate over all the gallery images instead of top-5 for j in range(5): rImageName = g_fnames[sort_inds[i][j]] rBBoxes = boundingBoxes.getBoundingBoxesByImageName(rImageName) iouTemp = [] weights = [] #Iterate over each element(boudingbox) for bb in qBBoxes: # qbbs query bounding boxes bb_cordinates = bb.getBoundingBox() bb_class = bb.classId #get the bouding box in retrieved image that has same class rbbs = [d for d in rBBoxes if d.classId == bb_class] iouMax = 0 for rbb in rbbs: assert(rbb.classId == bb_class) rbb_cordinates = rbb.getBoundingBox() iou = compute_iou(bb_cordinates, rbb_cordinates) if iou > iouMax: iouMax = iou if iou <0: print('Warning!!: Negative iou found ', 'ImageName:', rbb.getImageName(), ' bounding box', rbb.getBoundingBox() ) assert(iouMax>=0) #Store iou with best matched component label iouTemp.append(iouMax) weights.append(bb_cordinates[2]*bb_cordinates[3]) # Update iou into coressponding ClassIou classIou[bb_class].append(iouMax) current_iou = np.mean(iouTemp) # Average Iou between a query and a retrieved image weightTotal = np.sum(weights) weights = np.divide(weights, weightTotal) current_weightedIou = sum(iouTemp*weights) weightedIouList.append(current_weightedIou) iouList.append(current_iou) aGain = ndcg_at_k(iouList,5) wGain = ndcg_at_k(weightedIouList,5) aNdcg = np.append(aNdcg,aGain) wNdcg = np.append(wNdcg,wGain) time_e = (time.time() - time_s)/3600 print('Elasped time for one query: {:.3f}'.format(time_e)) avg_aNdcg = np.mean(aNdcg) avg_wNdcg = np.mean(wNdcg) return avg_aNdcg, avg_wNdcg