def detect_instance(score_map, mask, class_id, max_fragment_size=0): # converting pixel-wise instance ids into detection form pred_score = [] pred_label = [] pred_mask = [] for ag_score, ag_mask, ag_class in zip(score_map, mask, class_id): if np.sum(ag_mask) < 1: continue segments = pyutils.to_one_hot( measure.label(ag_mask, connectivity=1, background=0))[1:] # connected components analysis for seg_mask in segments: if np.sum(seg_mask) < max_fragment_size: pred_score.append(0) else: pred_score.append(np.max(ag_score * seg_mask)) pred_label.append(ag_class) pred_mask.append(seg_mask) return { 'score': np.stack(pred_score, 0), 'mask': np.stack(pred_mask, 0), 'class': np.stack(pred_label, 0) }
def cluster_centroids(centroids, displacement, thres=2.5): # thres: threshold for grouping centroid (see supp) dp_strength = np.sqrt(displacement[1]**2 + displacement[0]**2) height, width = dp_strength.shape weak_dp_region = dp_strength < thres dp_label = measure.label(weak_dp_region, connectivity=1, background=0) dp_label_1d = dp_label.reshape(-1) centroids_1d = centroids[0] * width + centroids[1] clusters_1d = dp_label_1d[centroids_1d] cluster_map = imutils.compress_range( clusters_1d.reshape(height, width) + 1) return pyutils.to_one_hot(cluster_map)
def _work_gpu(process_id, model, dataset, args): n_gpus = torch.cuda.device_count() databin = dataset[process_id] data_loader = DataLoader(databin, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False) with torch.no_grad(), cuda.device(process_id): model.cuda() for iter, pack in tqdm(enumerate(data_loader), total=len(databin)): img_name = pack['name'][0] path = os.path.join(args.ins_seg_out_dir, img_name + '.npy') if not os.path.exists(path): os.makedirs(os.path.dirname(path), exist_ok=True) size = np.asarray(pack['size']) edge, dp = model(pack['img'][0].cuda(non_blocking=True)) dp = dp.cpu().numpy() cam_dict = np.load(args.cam_out_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'].cuda() keys = cam_dict['keys'] centroids = find_centroids_with_refinement(dp) instance_map = cluster_centroids(centroids, dp) instance_cam = separte_score_by_mask(cams, instance_map) rw = indexing.propagate_to_edge(instance_cam, edge, beta=args.beta, exp_times=args.exp_times, radius=5) rw_up = F.interpolate( rw, scale_factor=4, mode='bilinear', align_corners=False)[:, 0, :size[0], :size[1]] rw_up = rw_up / torch.max(rw_up) rw_up_bg = F.pad(rw_up, (0, 0, 0, 0, 1, 0), value=args.ins_seg_bg_thres) num_classes = len(keys) num_instances = instance_map.shape[0] instance_shape = torch.argmax(rw_up_bg, 0).cpu().numpy() instance_shape = pyutils.to_one_hot( instance_shape, maximum_val=num_instances * num_classes + 1)[1:] instance_class_id = np.repeat(keys, num_instances) detected = detect_instance(rw_up.cpu().numpy(), instance_shape, instance_class_id, max_fragment_size=size[0] * size[1] * 0.01) np.save(path, detected) if process_id == n_gpus - 1 and iter % (len(databin) // 4) == 0: print("%d " % ((5 * iter + 1) // (len(databin) // 4)), end='')