def post_process(self, dets, meta, output_size): dets = dets.data.cpu().numpy() dets = dets.reshape(-1, dets.shape[2]) c = meta['c'] s = meta['s'] h, w = output_size top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], c, s, (w, h)) dets[:, 2:4] = transform_preds(dets[:, 2:4], c, s, (w, h)) classes = dets[:, -1] scores = dets[:, 4] ''' if len(scores) > self._max_per_image: kth = len(scores) - self._max_per_image thresh = np.partition(scores, kth)[kth] ''' for j in range(self._num_classes): inds = np.logical_and(classes == j, scores >= self._threshold) top_preds[j + 1] = np.concatenate([ dets[inds, :4].astype(np.float32), scores[inds].reshape( -1, 1).astype(np.float32) ], axis=1) return top_preds
def val_map(epoch): print('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch)
def post_process(self, dets, meta, scale=1): out_width, out_height = meta['out_width'], meta['out_height'] dets = dets.detach().cpu().numpy().reshape(2, -1, 14) dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] dets = dets.reshape(1, -1, 14) dets[0, :, 0:2] = transform_preds(dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height)) dets[0, :, 2:4] = transform_preds(dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height)) dets[:, :, 0:4] /= scale return dets[0]
def multi_pose_post_process(self, dets, c, s, h, w): # dets: batch x max_dets x 40 # return list of 39 in image coord ret = [] for i in range(dets.shape[0]): bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) pts = transform_preds(dets[i, :, 5:15].reshape(-1, 2), c[i], s[i], (w, h)) top_preds = np.concatenate( [bbox.reshape(-1, 4), dets[i, :, 4:5], pts.reshape(-1, 10), dets[i, :, 15:20]], axis=1).astype(np.float32).tolist() ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) return ret
def demo_image(image, image_name, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] path = "D:\\CV-Project\\pytorch-pose-hg-3d\\images\\last_save\\" _, image_name = os.path.split(image_name) image_name = image_name[:-4] debugger = Debugger() debugger.add_img(image, image_name) debugger.add_point_2d(pred, (255, 0, 0), image_name) debugger.add_point_3d(pred_3d, 'b') debugger.show_all_imgs(pause=False) debugger.show_3d(image_name, path) debugger.save_img(image_name, path)
def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] # 'hm': (1, 16, 64, 64), 'depth': (1, 16, 64, 64) preds, amb_idx = get_preds(out['hm'].detach().cpu().numpy()) pred = preds[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d, ignore_idx = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy(), amb_idx) pred_3d = pred_3d[0] ignore_idx = ignore_idx[0] debugger = Debugger() debugger.add_img(image) # デバッガークラスに画像をコピー debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b', ignore_idx=ignore_idx) debugger.show_all_imgs(pause=False) debugger.show_3d() print("Done")
def demo_image(image, model, opt, timestep): inps = [] s = None c = None hidden = None for t in range(timestep): s = max(image[t].shape[0], image[t].shape[1]) * 1.0 c = np.array([image[t].shape[1] / 2., image[t].shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image[t], trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) inps.append(inp) if opt.task == "conv3d": outs = model(inps) else: outs, hidden = model(inps, hidden) out = outs[-1] preds, amb_idx = get_preds(out[-1]['hm'].detach().cpu().numpy()) pred = preds[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d, ignore_idx = get_preds_3d(out[-1]['hm'].detach().cpu().numpy(), out[-1]['depth'].detach().cpu().numpy(), amb_idx) pred_3d = pred_3d[0] ignore_idx = ignore_idx[0] return image[-1], pred, pred_3d, ignore_idx
def demo_image(image, model, opt, save_path=None): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] debugger = Debugger() debugger.add_img(image) debugger.add_point_2d(pred, (255, 0, 0)) debugger.add_point_3d(pred_3d, 'b') # import pdb;pdb.set_trace() debugger.show_all_imgs(pause=True) debugger.show_3d() if save_path: debugger.save_3d(save_path)
def convert_eval_format(self, pred, conf, meta): ret = np.zeros((pred.shape[0], pred.shape[1], 2)) for i in range(pred.shape[0]): ret[i] = transform_preds( pred[i], meta['center'][i].numpy(), meta['scale'][i].numpy(), [self.opt.output_h, self.opt.output_w]) return ret
def demo_image( image, model, opt ): #image name added as an input, so that the individual output files can be asctibed to the respective image s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds( pred, c, s, (opt.output_w, opt.output_h) ) #this step readjusts the 2D skeleton to the input image with center and scale pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[ 0] #uses heatmap and depthmap to create 3D pose #pred_3d = transform_preds(pred_3d, c, s, (opt.output_w, opt.output_h,10)) #this step readjusts the 3D skeleton to the input image with center and scale ''' debugger = Debugger() debugger.add_img(image) #adds image debugger.add_point_2d(pred, (255, 0, 0)) #adds 2D graph of joints to image debugger.add_point_3d(pred_3d, 'b') #plots the 3D joint locations into the plot debugger.show_all_imgs(pause=False) #this command enables showing the images debugger.show_3d() #this command shows the figures ''' return pred_3d.flatten()
def ctdet_post_process(dets, c, s, h, w, num_classes): # dets: batch x max_dets x dim # return 1-based class det dict ret = [] for i in range(dets.shape[0]): top_preds = {} dets[i, :, :2] = transform_preds(dets[i, :, 0:2], c[i], s[i], (w, h)) dets[i, :, 2:4] = transform_preds(dets[i, :, 2:4], c[i], s[i], (w, h)) classes = dets[i, :, -1] for j in range(num_classes): inds = (classes == j) top_preds[j + 1] = np.concatenate([ dets[i, inds, :4].astype(np.float32), dets[i, inds, 4:5].astype(np.float32) ], axis=1).tolist() ret.append(top_preds) return ret
def estimate(self, image): if isinstance(image, str): image = cv2.imread(image) inp, c, s = self.processImage(image) inp = torch.from_numpy(inp).to(self.device) out = self.model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (self.output_w, self.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] return pred, pred_3d
def demo_image(image, model, opt, name): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform(c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) # pred 2d range (176, 256) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] pred_3d_real_size = pred_3d * 4 pred_3d_real_size[:, 0] = pred_3d_real_size[:, 0] - 40 # print(pred_3d) # pdb.set_trace() pred_3d_ordered = np.zeros([15, 3]) # the last one as mid hip for spline compute for i in range(16): pred_3d_ordered[corres[i]] = pred_3d_real_size[i] pred_3d_ordered[1] = (pred_3d_ordered[2] + pred_3d_ordered[5]) / 2 pred_3d_ordered[14] = (pred_3d_ordered[8] + pred_3d_ordered[11]) / 2 pred_3d_ordered[0] = -1 pred_3d_ordered[9:11] = -1 pred_3d_ordered[12:14] = -1 from good_order_cood_angle_convert import absolute_angles, anglelimbtoxyz2 # bias # neck as the offset # if pred_3d[8,:][0] != 0 or pred_3d[8,:][1] != 0: # bias = np.array([pred[8,0], pred[8,1]]) absolute_angles, limbs, offset = absolute_angles(pred_3d_ordered) # pdb.set_trace() # rev = anglelimbtoxyz2(offset, absolute_angles, limbs) pred_2d = pred_3d_ordered[:, :2] dic = { 'absolute_angles': absolute_angles, 'limbs': limbs, 'offset': offset } # pdb.set_trace() np.save(name, dic)
def show_det(dets, image, det_size, debugger, opt, pause=False, name=None): dets = dets.reshape(1, -1, dets.shape[2]) h, w = image.shape[0:2] debugger.add_img(image, img_id='ctdet') c = np.array([w / 2, h / 2], dtype=np.float32) s = np.array([w, h], dtype=np.float32) dets[0, :, :2] = transform_preds(dets[0, :, 0:2], c, s, det_size) dets[0, :, 2:4] = transform_preds(dets[0, :, 2:4], c, s, det_size) classes = dets[0, :, -1] for j in range(opt.num_classes): inds = (classes == j) top_preds = dets[0, inds, :5].astype(np.float32) for bbox in top_preds: if bbox[4] > opt.vis_thresh: debugger.add_coco_bbox(bbox[:4], j, bbox[4], img_id='ctdet') if name: print('detecting:', name) debugger.save_all_imgs(path='./', prefix=name) else: debugger.show_all_imgs(pause=pause)
def convert_eval_format(self, pred, conf, meta): preds = np.zeros((pred.shape[0], pred.shape[1], 2)) for i in range(pred.shape[0]): preds[i] = transform_preds(pred[i], meta['center'][i].numpy(), meta['scale'][i].numpy(), [self.opt.output_h, self.opt.output_w]) ret = [] for i in range(pred.shape[0]): kpts = np.concatenate([preds[i], conf[i]], axis=1).astype( np.int32).reshape(self.num_joints * 3).tolist() score = int(meta['score'][i]) ret.append({'category_id': 1, 'image_id': int(meta['image_id'].numpy()), \ 'keypoints': kpts, 'score': score}) return ret
def demo_image(image, model, opt): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] return image, pred, pred_3d
def predict(self, image): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [self.opt.input_w, self.opt.input_h]) inp = cv2.warpAffine(image, trans_input, (self.opt.input_w, self.opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(self.opt.device) out = self.model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (self.opt.output_w, self.opt.output_h)) # pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] # Overlay points on top fo image #return show_2d(image, pred, (255, 0, 0), mpii_edges) return image, pred, mpii_edges
def demo_image(image, model, opt, name): s = max(image.shape[0], image.shape[1]) * 1.0 c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) trans_input = get_affine_transform( c, s, 0, [opt.input_w, opt.input_h]) inp = cv2.warpAffine(image, trans_input, (opt.input_w, opt.input_h), flags=cv2.INTER_LINEAR) inp = (inp / 255. - mean) / std inp = inp.transpose(2, 0, 1)[np.newaxis, ...].astype(np.float32) inp = torch.from_numpy(inp).to(opt.device) out = model(inp)[-1] pred = get_preds(out['hm'].detach().cpu().numpy())[0] pred = transform_preds(pred, c, s, (opt.output_w, opt.output_h)) pred_3d = get_preds_3d(out['hm'].detach().cpu().numpy(), out['depth'].detach().cpu().numpy())[0] if pred_3d[6,:][0] != 0 or pred_3d[6,:][1] != 0: print("A different bias!!") pdb.set_trace() bias = np.array([pred[6,0], pred[6,1]]) dic = {'pred_3d': pred_3d, 'bias':bias} np.save(name, dic)
def val_map(epoch): print_log('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} speed_list = [] with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] start_image_time = time.time() segmentations = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # dict_tensor = torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device, non_blocking=True) # dict_tensor.requires_grad = False # hmap, regs, w_h_, _, _, codes, offsets = model(inputs[scale]['image'])[-1] hmap, regs, w_h_, codes, offsets = model( inputs[scale]['image'])[-1] output = [hmap, regs, w_h_, codes, offsets] segms = ctsegm_inmodal_code_decode( *output, torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] top_preds = {} for j in range(cfg.n_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2] = transform_preds( segms[:, cfg.n_vertices * 2:cfg.n_vertices * 2 + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4] = transform_preds( segms[:, cfg.n_vertices * 2 + 2:cfg.n_vertices * 2 + 4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.n_vertices * 2 + 4] /= scale segmentations.append(top_preds) end_image_time = time.time() segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, val_dataset.num_classes + 1) } scores = np.hstack([ segms_and_scores[j][:, cfg.n_vertices * 2 + 4] for j in range(1, val_dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = ( segms_and_scores[j][:, cfg.n_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] results[img_id] = segms_and_scores speed_list.append(end_image_time - start_image_time) eval_results = val_dataset.run_eval(results, save_dir=cfg.ckpt_dir) print_log(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print_log('Average speed on val set:{:.2f}'.format( 1. / np.mean(speed_list))) return eval_results[0]
def main(): # Create Test set labels for DETRAC detrac_root = cfg.label_dir dataType = 'Test' test_images = list() test_objects = list() annotation_folder = 'DETRAC-{}-Annotations-XML'.format(dataType) annotation_path = os.path.join(detrac_root, annotation_folder) if not os.path.exists(annotation_path): print('annotation_path not exist') raise FileNotFoundError label_file = os.path.join(annotation_path, cfg.video_name + '.xml') tree = ET.parse(label_file) root = tree.getroot() object_list = list() Box_dict = {} for obj in root.iter('frame'): boxes = list() frame_num = int(obj.attrib['num']) target_list = obj.find('target_list') for target in target_list: bbox = target.find('box').attrib left = float(bbox['left']) top = float(bbox['top']) width = float(bbox['width']) height = float(bbox['height']) boxes.append([left, top, left + width, top + height]) # x1, y1, x2, y2 Box_dict[frame_num] = boxes cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 150 num_classes = 80 if cfg.dataset == 'coco' else 4 colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) # Set up parameters for outputing video width = cfg.video_width height = cfg.video_height fps = cfg.video_fps # output video configuration video_out = cv2.VideoWriter( os.path.join(cfg.root_dir, cfg.video_name + '_compare.mkv'), cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height)) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading images speed_list = [] frame_list = sorted(os.listdir(os.path.join(cfg.img_dir, cfg.video_name))) n_frames = len(frame_list) for frame_id in range(n_frames): frame_n = frame_id + 1 frame_name = frame_list[frame_id] image_path = os.path.join(cfg.img_dir, cfg.video_name, frame_name) image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array( COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose( 2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } with torch.no_grad(): detections = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack( [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # Use opencv functions to output a video speed_list.append(time.time() - start_time) output_image = original_image # Plot the GT boxes gt_bboxes = Box_dict[frame_n] for rect in gt_bboxes: x1, y1, x2, y2 = float(rect[0]), float(rect[1]), float( rect[2]), float(rect[3]) cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 255, 0), thickness=2) counter = 1 for lab in bbox_and_scores: if cfg.dataset == 'coco': if names[lab] not in DETRAC_compatible_names: continue for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > cfg.detect_thres: text = names[lab] + '%.2f' % score label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) text_location = [ x1 + 2, y1 + 2, x1 + 2 + label_size[0][0], y1 + 2 + label_size[0][1] ] cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 0, 255), thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=(0, 0, 255)) cv2.imshow('Frames'.format(frame_id), output_image) video_out.write(output_image) if cv2.waitKey(1) & 0xFF == ord('q'): break print('Test frame rate:', 1. / np.mean(speed_list))
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 image = cv2.imread(cfg.img_dir) # orig_image = image height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else VOC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else VOC_STD, dtype=np.float32)[None, None, :] img = img.transpose(2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] if cfg.test_flip: img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=80 if cfg.dataset == 'coco' else 20) else: raise NotImplementedError model = load_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() with torch.no_grad(): detections = [] for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(80): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, 81 if cfg.dataset == 'coco' else 21): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, 81 if cfg.dataset == 'coco' else 21) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, 81 if cfg.dataset == 'coco' else 21): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # plt.show() fig = plt.figure(0) colors = COCO_COLORS if cfg.dataset == 'coco' else VOC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else VOC_NAMES plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) for lab in bbox_and_scores: for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > 0.3: plt.gca().add_patch( Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none')) plt.text(x1 + 3, y1 + 3, names[lab] + '%.2f' % score, bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k') fig.patch.set_visible(False) plt.axis('off') plt.savefig('data/demo_results.png', dpi=300, transparent=True) plt.show()
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 num_classes = 80 if cfg.dataset == 'coco' else 4 dictionary = np.load(cfg.dictionary_file) colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]), num_classes=80) # raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading COCO validation images annotation_file = '{}/annotations/instances_{}.json'.format( cfg.data_dir, cfg.data_type) coco = COCO(annotation_file) # Load all annotations cats = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in cats] catIds = coco.getCatIds(catNms=nms) # imgIds = coco.getImgIds(catIds=catIds) imgIds = coco.getImgIds() # annIds = coco.getAnnIds(catIds=catIds) # all_anns = coco.loadAnns(ids=annIds) # print(len(imgIds), imgIds) for id in imgIds: annt_ids = coco.getAnnIds(imgIds=[id]) annotations_per_img = coco.loadAnns(ids=annt_ids) # print('All annots: ', len(annotations_per_img), annotations_per_img) img = coco.loadImgs(id)[0] image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name']) w_img = int(img['width']) h_img = int(img['height']) if w_img < 1 or h_img < 1: continue img_original = cv2.imread(image_path) img_connect = cv2.imread(image_path) img_recon = cv2.imread(image_path) print('Image id: ', id) for annt in annotations_per_img: if annt['iscrowd'] == 1 or type(annt['segmentation']) != list: continue polygons = get_connected_polygon_using_mask( annt['segmentation'], (h_img, w_img), n_vertices=cfg.num_vertices, closing_max_kernel=60) gt_bbox = annt['bbox'] gt_x1, gt_y1, gt_w, gt_h = gt_bbox contour = np.array(polygons).reshape((-1, 2)) # Downsample the contour to fix number of vertices if len(contour) > cfg.num_vertices: resampled_contour = resample(contour, num=cfg.num_vertices) else: resampled_contour = turning_angle_resample( contour, cfg.num_vertices) resampled_contour[:, 0] = np.clip(resampled_contour[:, 0], gt_x1, gt_x1 + gt_w) resampled_contour[:, 1] = np.clip(resampled_contour[:, 1], gt_y1, gt_y1 + gt_h) clockwise_flag = check_clockwise_polygon(resampled_contour) if not clockwise_flag: fixed_contour = np.flip(resampled_contour, axis=0) else: fixed_contour = resampled_contour.copy() # Indexing from the left-most vertex, argmin x-axis idx = np.argmin(fixed_contour[:, 0]) indexed_shape = np.concatenate( (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0) x1, y1, x2, y2 = gt_x1, gt_y1, gt_x1 + gt_w, gt_y1 + gt_h # bbox_width, bbox_height = x2 - x1, y2 - y1 # bbox = [x1, y1, bbox_width, bbox_height] # bbox_center = np.array([(x1 + x2) / 2., (y1 + y2) / 2.]) bbox_center = np.mean(indexed_shape, axis=0) centered_shape = indexed_shape - bbox_center # visualize resampled points with multiple parts in image side by side for cnt in range(len(annt['segmentation'])): polys = np.array(annt['segmentation'][cnt]).reshape((-1, 2)) cv2.polylines(img_original, [polys.astype(np.int32)], True, (10, 10, 255), thickness=2) # cv2.drawContours(img_original, [polys.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1) cv2.polylines(img_connect, [indexed_shape.astype(np.int32)], True, (10, 10, 255), thickness=2) # cv2.drawContours(img_connect, [indexed_shape.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1) learned_val_codes, _ = fast_ista(centered_shape.reshape((1, -1)), dictionary, lmbda=0.1, max_iter=60) recon_contour = np.matmul(learned_val_codes, dictionary).reshape( (-1, 2)) recon_contour = recon_contour + bbox_center cv2.polylines(img_recon, [recon_contour.astype(np.int32)], True, (10, 10, 255), thickness=2) # cv2.drawContours(img_recon, [recon_contour.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1) # plot gt mean and std # image = cv2.imread(image_path) # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])), # # axes=(int(contour_std[0]), int(contour_std[1])), # # angle=0, startAngle=0, endAngle=360, color=(0, 255, 0), # # thickness=2) # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)), # pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)), # color=(0, 255, 0), thickness=2) # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255)) # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))), # pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))), # color=(255, 0, 0), thickness=2) # cv2.imshow('GT segments', image) # if cv2.waitKey() & 0xFF == ord('q'): # break image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array( COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose( 2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } with torch.no_grad(): segmentations = [] predicted_codes = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] # segms, codes_ = ctsegm_scaled_decode_debug(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device), # K=cfg.test_topk) segms = ctsegm_code_n_offset_decode( *output, torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] # codes_ = codes_.detach().cpu().numpy().reshape(1, -1, codes_.shape[2])[0] top_preds = {} code_preds = {} for j in range(cfg.num_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds( segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds( segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale # code_preds[j + 1] = codes_[inds, :] segmentations.append(top_preds) predicted_codes.append(code_preds) segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, num_classes + 1) } # a Dict label: segments # codes_and_scores = {j: np.concatenate([d[j] for d in predicted_codes], axis=0) # for j in range(1, num_classes + 1)} # a Dict label: segments scores = np.hstack([ segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] # codes_and_scores[j] = codes_and_scores[j][keep_inds] # Use opencv functions to output a video output_image = original_image blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8) # print(blend_mask.shape) for lab in segms_and_scores: for idx in range(len(segms_and_scores[lab])): res = segms_and_scores[lab][idx] # c_ = codes_and_scores[lab][idx] # for res in segms_and_scores[lab]: contour, bbox, score = res[:-5], res[-5:-1], res[-1] bbox[0] = np.clip(bbox[0], 0, w_img) bbox[1] = np.clip(bbox[1], 0, h_img) bbox[2] = np.clip(bbox[2], 0, w_img) bbox[3] = np.clip(bbox[3], 0, h_img) if score > cfg.detect_thres: text = names[lab] # + ' %.2f' % score # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5) polygon = contour.reshape((-1, 2)) # print('Shape: Poly -- ', polygon.shape) # print(polygon) polygon[:, 0] = np.clip(polygon[:, 0], 0, w_img - 1) polygon[:, 1] = np.clip(polygon[:, 1], 0, h_img - 1) # use bb tools to draw predictions color = random.choice(COLOR_WORLD) bb.add(output_image, bbox[0], bbox[1], bbox[2], bbox[3], text, color) cv2.polylines(output_image, [polygon.astype(np.int32)], True, RGB_DICT[color], thickness=1) cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1, color=RGB_DICT[color], thickness=-1) # color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # contour_mean = np.mean(polygon, axis=0) # contour_std = np.std(polygon, axis=0) # center_x, center_y = np.mean(polygon, axis=0).astype(np.int32) # text_location = [bbox[0] + 1, bbox[1] + 1, # bbox[1] + label_size[0][0] + 1, # bbox[0] + label_size[0][1] + 1] # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])), # pt2=(int(bbox[2]), int(bbox[3])), # color=color, thickness=1) # cv2.rectangle(output_image, pt1=(int(np.min(polygon[:, 0])), int(np.min(polygon[:, 1]))), # pt2=(int(np.max(polygon[:, 0])), int(np.max(polygon[:, 1]))), # color=(0, 255, 0), thickness=1) # cv2.polylines(output_image, [polygon.astype(np.int32)], True, color, thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5, # color=(255, 0, 0)) # cv2.putText(output_image, text, org=(int(bbox[0]), int(bbox[1])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5, # color=color) # show the histgram for predicted codes # fig = plt.figure() # plt.plot(np.arange(cfg.n_codes), c_.reshape((-1,)), color='green', # marker='o', linestyle='dashed', linewidth=2, markersize=6) # plt.ylabel('Value of each coefficient') # plt.xlabel('All predicted {} coefficients'.format(cfg.n_codes)) # plt.title('Distribution of the predicted coefficients for {}'.format(text)) # plt.show() value = [255, 255, 255] dst_img = cv2.addWeighted(output_image, 0.5, blend_mask, 0.5, 0) dst_img[blend_mask == 0] = output_image[blend_mask == 0] img_original = cv2.copyMakeBorder(img_original, 0, 0, 0, 10, cv2.BORDER_CONSTANT, None, value) img_connect = cv2.copyMakeBorder(img_connect, 0, 0, 10, 10, cv2.BORDER_CONSTANT, None, value) img_recon = cv2.copyMakeBorder(img_recon, 0, 0, 10, 10, cv2.BORDER_CONSTANT, None, value) dst_img = cv2.copyMakeBorder(dst_img, 0, 0, 10, 0, cv2.BORDER_CONSTANT, None, value) im_cat = np.concatenate( (img_original, img_connect, img_recon, dst_img), axis=1) # im_cat = np.concatenate((img_original, img_connect, img_recon), axis=1) cv2.imshow('GT:Resample:Recons:Predict', im_cat) if cv2.waitKey() & 0xFF == ord('q'): break
def main(): cfg = get_cfg() max_per_image = 100 num_classes = cfg.num_classes print('Loading model...') model_name = '%s_hc%s' % (cfg.arch, cfg.head_conv) model, shift_buffer = load_network_arch(cfg.arch, cfg.num_classes, cfg.head_conv, pretrained=False) model = load_model(model, cfg.model_path, is_nested=False, map_location='cpu') model = model.to(cfg.device) model.eval() debugger = Debugger(dataset=cfg.dataset, ipynb=False, theme='black') all_inputs = [load_and_transform_image(cfg.fn_image, cfg.img_size)] results = {} with torch.no_grad(): img_id, inputs = all_inputs[0] detections = [] for scale in [1.]: img_numpy = inputs[scale]['image'] img = torch.from_numpy(img_numpy).to(cfg.device) output = model(img)[-1] # array of 3 dets = ctdet_decode(*output, K=cfg.test_topk) # torch.Size([1, 100, 6]) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] # (100,6) # debug img uses dets prior to post_process add_debug_image(debugger, img_numpy, dets, output, scale) # print( 'meta: ', inputs[scale]['center'], inputs[scale]['scale'], inputs[scale]['fmap_w'], inputs[scale]['fmap_h'] ) dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) # print( 'dets post_proc: ', dets ) # MNV3: [[117.8218 132.52121 227.10435 351.23346 0.854211 14. ]] # resnet18: [[115.41386, 133.93118, 230.14862, 356.79816, 0.90593797]] cls = dets[:, -1] # (100,) top_preds = {} for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) # if len(dataset.test_scales) > 1: # soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack( [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores # print( 'bbox_and_scores: ', bbox_and_scores ) # show_results(debugger, image, results) debugger.show_all_imgs(pause=True)
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 if cfg.dataset == 'coco': num_classes = 80 colors = COCO_COLORS names = COCO_NAMES elif cfg.dataset == 'DETRAC': num_classes = 3 colors = DETRAC_COLORS names = DETRAC_NAMES elif cfg.dataset == 'kins': num_classes = 7 colors = KINS_COLORS names = KINS_NAMES else: print('Please specify correct dataset name.') raise NotImplementedError for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) # Set up parameters for outputing video output_folder = os.path.join(cfg.root_dir, 'demo') if not os.path.exists(output_folder): os.mkdir(output_folder) width = cfg.video_width height = cfg.video_height fps = cfg.video_fps # output video configuration video_out = cv2.VideoWriter( os.path.join(output_folder, cfg.output_video_file), cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height)) text_out = open(os.path.join(output_folder, cfg.output_text_file), 'w') dictionary = np.load(cfg.dictionary_file) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) elif 'resdcn' in cfg.arch: model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64, num_classes=num_classes, num_codes=cfg.n_codes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading images speed_list = [] frame_list = sorted(os.listdir(cfg.img_dir)) n_frames = len(frame_list) for frame_id in range(n_frames): frame_name = frame_list[frame_id] image_path = os.path.join(cfg.img_dir, frame_name) image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size[0] > 0 and cfg.img_size[1] > 0: img_height, img_width = cfg.img_size[0], cfg.img_size[1] center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array( COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose( 2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } with torch.no_grad(): segmentations = [] predicted_codes = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) hmap, regs, w_h_, offsets, _, _, codes = model( imgs[scale]['image'])[-1] output = [hmap, regs, w_h_, codes, offsets] segms = ctsegm_scale_decode( *output, torch.from_numpy(dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] top_preds = {} code_preds = {} for j in range(cfg.num_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds( segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds( segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale segmentations.append(top_preds) predicted_codes.append(code_preds) segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, num_classes + 1) } # a Dict label: segments scores = np.hstack([ segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] # codes_and_scores[j] = codes_and_scores[j][keep_inds] # Use opencv functions to output a video output_image = original_image blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8) counter = 1 for lab in segms_and_scores: if cfg.dataset == 'coco': if names[lab] not in display_cat and cfg.dataset != 'kins': continue for idx in range(len(segms_and_scores[lab])): res = segms_and_scores[lab][idx] contour, bbox, score = res[:-5], res[-5:-1], res[-1] bbox[0] = np.clip(bbox[0], 0, width - 1) bbox[1] = np.clip(bbox[1], 0, height - 1) bbox[2] = np.clip(bbox[2], 0, width - 1) bbox[3] = np.clip(bbox[3], 0, height - 1) polygon = contour.reshape((-1, 2)) polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1) polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1) if score > cfg.detect_thres: text = names[lab] + ' %.2f' % score label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) text_location = [ int(bbox[0]) + 2, int(bbox[1]) + 2, int(bbox[0]) + 2 + label_size[0][0], int(bbox[1]) + 2 + label_size[0][1] ] # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])), # pt2=(int(bbox[2]), int(bbox[3])), # color=colors[lab], thickness=2) # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])), # pt2=(int(bbox[2]), int(bbox[3])), # color=nice_colors[names[lab]], thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=nice_colors[names[lab]]) cv2.polylines(output_image, [polygon.astype(np.int32)], True, color=nice_colors[names[lab]], thickness=2) cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1, color=nice_colors[names[lab]], thickness=-1) # add to text file new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format( str(frame_id + 1), counter, int(bbox[0]), int(bbox[1]), int(bbox[2]) - int(bbox[0]), int(bbox[3]) - int(bbox[1]), score) counter += 1 text_out.write(new_line) dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0) dst_img[blend_mask == 0] = output_image[blend_mask == 0] output_image = dst_img cv2.imshow('Frames', output_image) video_out.write(output_image) if cv2.waitKey(1) & 0xFF == ord('q'): break print('Test frame rate:', 1. / np.mean(speed_list))
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 num_classes = 80 if cfg.dataset == 'coco' else 4 colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) # Set up parameters for outputing video output_name = 'demo/' width = cfg.video_width height = cfg.video_height fps = cfg.video_fps # output video configuration video_out = cv2.VideoWriter(cfg.output_video_dir, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height)) text_out = open(cfg.output_text_dir, 'w') print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading images speed_list = [] frame_list = sorted(os.listdir(cfg.img_dir)) n_frames = len(frame_list) for frame_id in range(n_frames): frame_name = frame_list[frame_id] image_path = os.path.join(cfg.img_dir, frame_name) image = cv2.imread(image_path) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose(2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = {'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4)} with torch.no_grad(): detections = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds(dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(cfg.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] # Use opencv functions to output a video # output_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) speed_list.append(time.time() - start_time) output_image = original_image counter = 1 for lab in bbox_and_scores: if cfg.dataset == 'coco': if names[lab] not in DETRAC_compatible_names: continue for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if score > cfg.detect_thres: text = names[lab] + '%.2f' % score label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) text_location = [x1 + 2, y1 + 2, x1 + 2 + label_size[0][0], y1 + 2 + label_size[0][1]] # cv2.rectangle(output_image, pt1=(int(x1), int(y1)), # pt2=(int(x2), int(y2)), # color=colors[lab], thickness=2) cv2.rectangle(output_image, pt1=(int(x1), int(y1)), pt2=(int(x2), int(y2)), color=(0, 255, 0), thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=(0, 0, 255)) # add to text file new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(str(frame_id + 1), counter, x1, y1, x2 - x1, y2 - y1, score) counter += 1 text_out.write(new_line) cv2.imshow('Frames'.format(frame_id), output_image) video_out.write(output_image) if cv2.waitKey(5) & 0xFF == ord('q'): break print('Test frame rate:', 1. / np.mean(speed_list))
def Evaluate(epoch, model): print('\n Evaluate@Epoch: %d' % epoch) start_time = time.clock() print('Start time %s Seconds' % start_time) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} with torch.no_grad(): for inputs in data_loader: img_id, inputs, img_path = inputs[0] detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to( cfg.device) # (1,3) output = model( inputs[scale]['image'])[-1] # hmap, regs, pxpy dets = ctdet_decode( *output, K=cfg.test_topk ) # torch.cat([bboxes, scores, clses], dim=2) dets = dets.detach().cpu().numpy().reshape( 1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds( dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds( dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = dets[:, -1] for j in range(dataset.num_classes): inds = (clses == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = { j: np.concatenate([d[j] for d in detections], axis=0) for j in range(1, dataset.num_classes + 1) } scores = np.hstack([ bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] results[img_id] = bbox_and_scores end_time = time.clock() eval_results = dataset.run_eval(results, save_dir=cfg.ckpt_dir) print(eval_results) print('End time %s Seconds' % end_time) Run_time = end_time - start_time FPS = 100 / Run_time # replace 100 with the number of images print('FPS %s ' % FPS) #summary_writer.add_scalar('Evaluate_mAP/mAP', eval_results[0], epoch) return eval_results[0]
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 num_classes = 80 if cfg.dataset == 'coco' else 4 dictionary = np.load(cfg.dictionary_file) colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) elif 'resdcn' in cfg.arch: model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64, num_classes=num_classes, num_codes=cfg.n_codes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading COCO validation images if 'train' in cfg.data_type: annotation_file = '{}/annotations/instances_train2017.json'.format(cfg.data_dir) cfg.data_type = 'train2017' elif 'test' in cfg.data_type: annotation_file = '{}/annotations/image_info_test-dev2017.json'.format(cfg.data_dir) cfg.data_type = 'test2017' else: annotation_file = '{}/annotations/instances_val2017.json'.format(cfg.data_dir) cfg.data_type = 'val2017' coco = COCO(annotation_file) # Load all annotations # cats = coco.loadCats(coco.getCatIds()) # nms = [cat['name'] for cat in cats] # catIds = coco.getCatIds(catNms=nms) # imgIds = np.sort(coco.getImgIds()).tolist() imgIds = coco.getImgIds() # annIds = coco.getAnnIds(catIds=catIds) # all_anns = coco.loadAnns(ids=annIds) for img_id in imgIds: img = coco.loadImgs(img_id)[0] image_path = '%s/coco/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name']) w_img = int(img['width']) h_img = int(img['height']) if w_img < 1 or h_img < 1: continue ann_ids = coco.getAnnIds(imgIds=img_id) gt_anns = coco.loadAnns(ids=ann_ids) # plot gt mean and std # image = cv2.imread(image_path) # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])), # # axes=(int(contour_std[0]), int(contour_std[1])), # # angle=0, startAngle=0, endAngle=360, color=(0, 255, 0), # # thickness=2) # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)), # pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)), # color=(0, 255, 0), thickness=2) # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255)) # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))), # pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))), # color=(255, 0, 0), thickness=2) # cv2.imshow('GT segments', image) # if cv2.waitKey() & 0xFF == ord('q'): # break image = cv2.imread(image_path, cv2.IMREAD_COLOR) if image is None: continue print('Loading image of id:', img_id) # plotting the groundtruth gt_image = image.copy() gt_blend_mask = np.zeros(shape=gt_image.shape, dtype=np.uint8) for ann_ in gt_anns: if ann_['iscrowd'] == 1: continue polygons_ = ann_['segmentation'] use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1] for poly in polygons_: poly = np.array(poly).reshape((-1, 2)) cv2.polylines(gt_image, [poly.astype(np.int32)], True, color=switch_tuple(RGB_DICT[use_color_key]), thickness=2) cv2.drawContours(gt_blend_mask, [poly.astype(np.int32)], contourIdx=-1, color=switch_tuple(RGB_DICT[use_color_key]), thickness=-1) original_image = image.copy() height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose(2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = {'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4)} with torch.no_grad(): segmentations = [] predicted_codes = [] start_time = time.time() print('Start running model ......') for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) hmap, regs, w_h_, _, _, codes, offsets = model(imgs[scale]['image'])[-1] output = [hmap, regs, w_h_, codes, offsets] segms = ctsegm_scale_decode(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0] top_preds = {} code_preds = {} for j in range(cfg.num_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds( segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds( segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32) top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale segmentations.append(top_preds) predicted_codes.append(code_preds) segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, num_classes + 1)} # a Dict label: segments scores = np.hstack( [segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1)]) print('Image processing time {:.4f} sec, preparing output image ......'.format(time.time() - start_time)) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] # Use opencv functions to output output_image = original_image blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8) counter = 1 for lab in segms_and_scores: # if cfg.dataset == 'coco': # if names[lab] not in display_cat and cfg.dataset != 'kins': # continue for idx in range(len(segms_and_scores[lab])): res = segms_and_scores[lab][idx] contour, bbox, score = res[:-5], res[-5:-1], res[-1] bbox[0] = np.clip(bbox[0], 0, width - 1) bbox[1] = np.clip(bbox[1], 0, height - 1) bbox[2] = np.clip(bbox[2], 0, width - 1) bbox[3] = np.clip(bbox[3], 0, height - 1) polygon = contour.reshape((-1, 2)) polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1) polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1) if score > cfg.detect_thres: # text = names[lab] + ' %.2f' % score # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1) # text_location = [int(bbox[0]) + 2, int(bbox[1]) + 2, # int(bbox[0]) + 2 + label_size[0][0], # int(bbox[1]) + 2 + label_size[0][1]] # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])), # pt2=(int(bbox[2]), int(bbox[3])), # color=colors[lab], thickness=2) # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])), # pt2=(int(bbox[2]), int(bbox[3])), # color=nice_colors[names[lab]], thickness=2) # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])), # fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3, # color=nice_colors[names[lab]]) use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1] cv2.polylines(output_image, [polygon.astype(np.int32)], True, color=switch_tuple(RGB_DICT[use_color_key]), thickness=2) cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1, color=switch_tuple(RGB_DICT[use_color_key]), thickness=-1) counter += 1 dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0) dst_img[blend_mask == 0] = output_image[blend_mask == 0] gt_dst_img = cv2.addWeighted(gt_image, 0.4, gt_blend_mask, 0.6, 0) gt_dst_img[gt_blend_mask == 0] = gt_image[gt_blend_mask == 0] cat_image = np.concatenate([dst_img, gt_dst_img], axis=1) cv2.imshow('Frames', cat_image) if cv2.waitKey() & 0xFF == ord('q'): break
def main(): logger = create_logger(save_dir=cfg.log_dir) print = logger.info print(cfg) cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 Dataset_eval = Damage_eval # your own data set # Crack RE Spalling dataset = Dataset_eval(cfg.data_dir, split='val', test_scales=cfg.test_scales, test_flip=cfg.test_flip) # split test data_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=dataset.collate_fn) print('Creating model...') if 'hourglass' in cfg.arch: model = get_hourglass[cfg.arch] elif 'resdcn' in cfg.arch: model = get_pose_net_resdcn(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet': model = get_pose_net(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'res_CBAM': model = get_pose_net_resnet_CBAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_PAM': model = get_pose_net_resnet_PAM(num_layers=18, head_conv=64, num_classes=3) elif cfg.arch == 'resnet_SE': model = get_pose_net_resnet_SE(num_layers=18, head_conv=64, num_classes=3) model = load_model(model, cfg.pretrain_dir) model = model.to(cfg.device) model.eval() results = {} with torch.no_grad(): for inputs in tqdm(data_loader): img_id, inputs,img_path = inputs[0] print('id%s ',img_id) detections = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) output = model(inputs[scale]['image'])[-1] dets = ctdet_decode(*output, K=cfg.test_topk) dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0] top_preds = {} dets[:, :2] = transform_preds(dets[:, 0:2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) dets[:, 2:4] = transform_preds(dets[:, 2:4], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) cls = dets[:, -1] for j in range(dataset.num_classes): inds = (cls == j) top_preds[j + 1] = dets[inds, :5].astype(np.float32) top_preds[j + 1][:, :4] /= scale detections.append(top_preds) bbox_and_scores = {} for j in range(1, dataset.num_classes + 1): bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0) if len(dataset.test_scales) > 1: soft_nms(bbox_and_scores[j], Nt=0.5, method=2) scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, dataset.num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, dataset.num_classes + 1): keep_inds = (bbox_and_scores[j][:, 4] >= thresh) bbox_and_scores[j] = bbox_and_scores[j][keep_inds] images_test = cv2.imread(img_path) fig = plt.figure(0) colors = COCO_COLORS names = COCO_NAMES #cv2.imwrite('E:/test1.png',images_test) plt.imshow(cv2.cvtColor(images_test, cv2.COLOR_BGR2RGB)) for lab in bbox_and_scores: for boxes in bbox_and_scores[lab]: x1, y1, x2, y2, score = boxes if (x1 < 0): x1 = 0 if (y1 < 0): y1 = 0 if (x2 > 511): x2 = 511 if (y2 > 511): y2 = 511 if score > 0.2: plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=colors[lab], facecolor='none')) plt.text(x1 -12 , y1 - 12 , names[lab], bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k') fig.patch.set_visible(False) Save_dir = 'data/damage/Predict_images' # save images Image_name = img_path[-10:] Save_dir = os.path.join(Save_dir, Image_name) plt.axis('off') plt.savefig(Save_dir, dpi=400, transparent=True, bbox_inches="tight", pad_inches=0.1) # 保存 plt.close(0) results[img_id] = bbox_and_scores eval_results = dataset.run_eval(results, cfg.ckpt_dir) print(eval_results)
def main(): cfg.device = torch.device('cuda') torch.backends.cudnn.benchmark = False max_per_image = 100 num_classes = 80 if cfg.dataset == 'coco' else 4 dictionary = np.load(cfg.dictionary_file) colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES for j in range(len(names)): col_ = [c * 255 for c in colors[j]] colors[j] = tuple(col_) print('Creating model and recover from checkpoint ...') if 'hourglass' in cfg.arch: model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512], modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes) else: raise NotImplementedError model = load_demo_model(model, cfg.ckpt_dir) model = model.to(cfg.device) model.eval() # Loading COCO validation images annotation_file = '{}/annotations/instances_{}.json'.format( cfg.data_dir, cfg.data_type) coco = COCO(annotation_file) # Load all annotations imgIds = coco.getImgIds() det_results = [] seg_results = [] for img_id in imgIds: img = coco.loadImgs(img_id)[0] image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name']) w_img = int(img['width']) h_img = int(img['height']) if w_img < 1 or h_img < 1: continue image = cv2.imread(image_path) height, width = image.shape[0:2] padding = 127 if 'hourglass' in cfg.arch else 31 imgs = {} for scale in cfg.test_scales: new_height = int(height * scale) new_width = int(width * scale) if cfg.img_size > 0: img_height, img_width = cfg.img_size, cfg.img_size center = np.array([new_width / 2., new_height / 2.], dtype=np.float32) scaled_size = max(height, width) * 1.0 scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32) else: img_height = (new_height | padding) + 1 img_width = (new_width | padding) + 1 center = np.array([new_width // 2, new_height // 2], dtype=np.float32) scaled_size = np.array([img_width, img_height], dtype=np.float32) img = cv2.resize(image, (new_width, new_height)) trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height]) img = cv2.warpAffine(img, trans_img, (img_width, img_height)) img = img.astype(np.float32) / 255. img -= np.array( COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :] img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :] img = img.transpose( 2, 0, 1)[None, :, :, :] # from [H, W, C] to [1, C, H, W] # if cfg.test_flip: # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) imgs[scale] = { 'image': torch.from_numpy(img).float(), 'center': np.array(center), 'scale': np.array(scaled_size), 'fmap_h': np.array(img_height // 4), 'fmap_w': np.array(img_width // 4) } with torch.no_grad(): # print('In with no_grads()') segmentations = [] start_time = time.time() for scale in imgs: imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device) output = model(imgs[scale]['image'])[-1] segms = ctsegm_decode(*output, torch.from_numpy( dictionary.astype(np.float32)).to( cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape( 1, -1, segms.shape[2])[0] top_preds = {} for j in range(cfg.num_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds( segms[:, 2 * j:2 * j + 2], imgs[scale]['center'], imgs[scale]['scale'], (imgs[scale]['fmap_w'], imgs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 1].astype(np.float32) top_preds[j + 1][:, :cfg.num_vertices * 2] /= scale segmentations.append(top_preds) segms_and_scores = { j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, num_classes + 1) } # a Dict label: segments scores = np.hstack([ segms_and_scores[j][:, cfg.num_vertices * 2] for j in range(1, num_classes + 1) ]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, num_classes + 1): keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] # generate coco results for server eval # print('generate coco results for server eval ...') for lab in segms_and_scores: for res in segms_and_scores[lab]: poly, score = res[:-1], res[-1] recon_contour = poly.reshape((-1, 2)) recon_contour[:, 0] = np.clip(recon_contour[:, 0], 0, img_width - 1) recon_contour[:, 1] = np.clip(recon_contour[:, 1], 0, img_height - 1) category_id = int(COCO_IDS[lab - 1]) if score > cfg.detect_thres: x1, y1, x2, y2 = int(min(recon_contour[:, 0])), int(min(recon_contour[:, 1])), \ int(max(recon_contour[:, 0])), int(max(recon_contour[:, 1])) bbox = [x1, y1, x2 - x1, y2 - y1] det = { 'image_id': int(img_id), 'category_id': int(category_id), 'score': float("{:.2f}".format(score)), 'bbox': bbox } det_results.append(det) # convert polygons to rle masks poly = np.ndarray.flatten( recon_contour, order='C').tolist() # row major flatten rles = cocomask.frPyObjects([poly], img_height, img_width) rle = cocomask.merge(rles) m = cocomask.decode(rle) rle_new = encode_mask(m.astype(np.uint8)) seg = { 'image_id': int(img_id), 'category_id': int(category_id), 'score': float("{:.2f}".format(score)), 'segmentation': rle_new } seg_results.append(seg) with open( '{}/coco_result/{}_det_results_v{}.json'.format( cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_det: json.dump(det_results, f_det) with open( '{}/coco_result/{}_seg_results_v{}.json'.format( cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_seg: json.dump(seg_results, f_seg) # run COCO detection evaluation print('Running COCO detection val17 evaluation ...') coco_pred = coco.loadRes('{}/coco_result/{}_det_results_v{}.json'.format( cfg.root_dir, cfg.data_type, cfg.num_vertices)) imgIds = sorted(coco.getImgIds()) coco_eval = COCOeval(coco, coco_pred, 'bbox') coco_eval.params.imgIds = imgIds coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() print( '---------------------------------------------------------------------------------' ) print('Running COCO segmentation val17 evaluation ...') coco_pred = coco.loadRes('{}/coco_result/{}_seg_results_v{}.json'.format( cfg.root_dir, cfg.data_type, cfg.num_vertices)) coco_eval = COCOeval(coco, coco_pred, 'segm') coco_eval.params.imgIds = imgIds coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize()
def val_map(epoch): print('\n Val@Epoch: %d' % epoch) model.eval() torch.cuda.empty_cache() max_per_image = 100 results = {} input_scales = {} speed_list = [] with torch.no_grad(): for inputs in val_loader: img_id, inputs = inputs[0] start_image_time = time.time() segmentations = [] for scale in inputs: inputs[scale]['image'] = inputs[scale]['image'].to(cfg.device) if scale == 1. and img_id not in input_scales.keys(): # keep track of the input image Sizes _, _, input_h, input_w = inputs[scale]['image'].shape input_scales[img_id] = {'h': input_h, 'w': input_w} output = model(inputs[scale]['image'])[-1] segms = ctsegm_decode(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device), K=cfg.test_topk) segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0] top_preds = {} for j in range(cfg.n_vertices): segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2], inputs[scale]['center'], inputs[scale]['scale'], (inputs[scale]['fmap_w'], inputs[scale]['fmap_h'])) clses = segms[:, -1] for j in range(val_dataset.num_classes): inds = (clses == j) top_preds[j + 1] = segms[inds, :cfg.n_vertices * 2 + 1].astype(np.float32) top_preds[j + 1][:, :cfg.n_vertices * 2] /= scale segmentations.append(top_preds) end_image_time = time.time() segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0) for j in range(1, val_dataset.num_classes + 1)} scores = np.hstack([segms_and_scores[j][:, cfg.n_vertices * 2] for j in range(1, val_dataset.num_classes + 1)]) if len(scores) > max_per_image: kth = len(scores) - max_per_image thresh = np.partition(scores, kth)[kth] for j in range(1, val_dataset.num_classes + 1): keep_inds = (segms_and_scores[j][:, cfg.n_vertices * 2] >= thresh) segms_and_scores[j] = segms_and_scores[j][keep_inds] results[img_id] = segms_and_scores # print(segms_and_scores) # exit() speed_list.append(end_image_time - start_image_time) eval_results = val_dataset.run_eval(results, input_scales, save_dir=cfg.ckpt_dir) print(eval_results) summary_writer.add_scalar('val_mAP/mAP', eval_results[0], epoch) print('Average speed on val set:{:.2f}'.format(1. / np.mean(speed_list)))