def train(): dataset = get_dataset() heads = {'hm': dataset.num_classes, 'gd': 2, 'reg': 2} net = get_pose_net(34, heads) if args.resume: missing, unexpected = net.load_state_dict( { k.replace('module.', ''): v for k, v in torch.load(args.resume, map_location='cpu').items() }, strict=False) if missing: print('Missing:', missing) if unexpected: print('Unexpected:', unexpected) net.train() # net = nn.DataParallel(net.cuda(), device_ids=[0,1,2,3,4,5,6,7]) torch.backends.cudnn.benchmark = True # optimizer = optim.Adam(net.parameters(), lr=args.lr) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) for param_group in optimizer.param_groups: param_group['initial_lr'] = args.lr adjust_learning_rate = optim.lr_scheduler.MultiStepLR( optimizer, [90, 120], 0.1, args.start_iter) # adjust_learning_rate = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, args.start_iter) criterion = nn.DataParallel(CtdetLoss(net).cuda(), device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) print('Loading the dataset...') print('Training CenterNet on:', dataset.name) print('Using the specified args:') print(args) data_loader = data.DataLoader(dataset(args.dataset_root, 'train'), args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator for iteration in range(args.start_iter + 1, args.epochs + 1): loss = train_one_epoch(data_loader, net, criterion, optimizer, iteration) adjust_learning_rate.step() if (not (iteration - args.start_iter) == 0 and iteration % 1 == 0): print('Saving state, iter:', iteration) torch.save( net.state_dict(), args.save_folder + 'instance_dla_' + repr(iteration) + loss + '.pth')
def inference(): # load data Dataset = get_dataset()(args.voc_root, 'val') val_loader = data.DataLoader(Dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True, collate_fn=val_collate) # load net heads = {'hm': Dataset.num_classes, 'wh': 2, 'reg': 2} net = get_pose_net(34, heads) net.load_state_dict({ k.replace('module.', ''): v for k, v in torch.load(args.trained_model).items() }) # load_model(net, 'ctdet_coco_dla_2x.pth') net.eval() net = nn.DataParallel(net.cuda(), device_ids=[0]) print('Finished loading model!') if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation results = {} with tqdm(total=len(val_loader)) as bar: for i in val_loader: preds = net(i['input']) output = preds[0] reg = output['reg'] dets = ctdet_decode(output['hm'].sigmoid_(), output['wh'], reg=reg) dets = dets.detach().cpu().numpy() dets = dets.reshape(1, -1, dets.shape[2]) dets = ctdet_post_process(dets.copy(), [i['meta'][0]['c']], [i['meta'][0]['s']], i['meta'][0]['out_height'], i['meta'][0]['out_width'], 80) for j in range(1, 80 + 1): dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) results[int(i['meta'][0]['img_id'])] = merge_outputs([dets[0]]) bar.update(1) Dataset.save_results(results, '.')
import torch from nets import get_pose_net heads = {'hm': 20, 'wh': 2 * 20, 'reg': 2} net = get_pose_net(50, heads) t = torch.randn(2, 3, 300, 300) print(net) net(t)
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) topk_clses = (topk_ind.true_divide(K)).int() topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) return topk_score, topk_inds, topk_clses, topk_ys, topk_xs # model = '../backups/dla_instance_v4.0/dla_instance_139_1877.pth' # model = '../backups/dla_instance_v5.0/dla_instance_104_4343.pth' model = 'checkpoints/dla_instance_140_2615.pth' imgpath = 'images/dogs_people.jpg' net = get_pose_net(50, {'hm': 80, 'grad': 3}).cuda() missing, unexpected = net.load_state_dict(torch.load(model)) net.eval() img = bg = cv.imread(imgpath) img = cv.cvtColor(img, cv.COLOR_BGR2RGB) img = pre_process(img) with torch.no_grad(): output = net(img.cuda()) pred = output['hm'].sigmoid().cpu() grad = output['grad'].cpu() # m = output['mask'].cpu() pred = torch.argmax( torch.cat([torch.ones(1, 16, 16) * 0.1, pred.squeeze()], 0), 0)
def train(): torch.backends.cudnn.benchmark = True _distributed = False if 'WORLD_SIZE' in os.environ: _distributed = int(os.environ['WORLD_SIZE']) > 1 if _distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') N_gpu = torch.distributed.get_world_size() else: N_gpu = 1 net = get_pose_net(50, {'hm': 80, 'grad': 3}) if args.resume: missing, unexpected = net.load_state_dict(torch.load( args.resume, map_location='cpu'), strict=False) # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, # weight_decay=5e-4) optimizer = torch.optim.Adam(net.parameters(), args.lr) for param_group in optimizer.param_groups: param_group['initial_lr'] = args.lr adjust_learning_rate = optim.lr_scheduler.MultiStepLR( optimizer, [90, 120], 0.1, args.start_iter) # adjust_learning_rate = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, args.start_iter) if not args.local_rank: print('Loading the dataset....', end='') if _distributed: getloss = nn.parallel.DistributedDataParallel( NetwithLoss(net).cuda(), device_ids=[args.local_rank], find_unused_parameters=True) external = panopticInputIterator(args.batch_size) pipe = panopticPipeline(external, DALIAugmentation(512), args.batch_size, args.num_workers, args.local_rank) data_loader = DALIGenericIterator( pipe, ["images", "anns", "gx", "gy", "x", "y", "s", "c1", "c2"], fill_last_batch=False, auto_reset=True, size=external.size // N_gpu + 1) else: getloss = nn.DataParallel(NetwithLoss(net).cuda(), device_ids=[0, 1, 2, 3, 4, 5, 6, 7]) dataset = panopticDataset(Augmentation(512)) sampler = torch.utils.data.distributed.DistributedSampler(dataset) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=collate, pin_memory=True, sampler=sampler) if not args.local_rank: print('Finished!') if not args.local_rank: print('Training CenterNet on:', 'dali-panoptic no.%d' % args.local_rank) print('Using the specified args:') print(args) torch.cuda.empty_cache() # create batch iterator for iteration in range(args.start_iter + 1, args.epochs + 1): loss = train_one_epoch(data_loader, getloss, optimizer, iteration) external.shuffle() adjust_learning_rate.step() if (not (iteration - args.start_iter) == 0): torch.distributed.barrier() if not args.local_rank: torch.save( net.state_dict(), args.save_folder + 'dla_instance_' + '%03d' % iteration + loss + '.pth') print('Save model %03d' % iteration + loss + '.pth')
topk_xs = (topk_inds % width).int().float() topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) topk_clses = (topk_ind.true_divide(K)).int() topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) return topk_score, topk_inds, topk_clses, topk_ys, topk_xs model = 'checkpoints/dla_instance_038_9234.pth' imgpath = 'images/iceland_sheep.jpg' net = get_pose_net(34, {'hm': 80, 'grad': 2, 'mask': 1}).cuda() missing, unexpected = net.load_state_dict(torch.load(model)) net.eval() img = bg = cv.imread(imgpath) img = cv.cvtColor(img, cv.COLOR_BGR2RGB) img = pre_process(img) with torch.no_grad(): output = net(img.cuda()) pred = output['hm'].sigmoid() grad = output['grad'] mask = output['mask'] m = mask.sigmoid().ge(0.5).type_as(pred) pred = pred * m.expand_as(pred)