def main(): from main import Model_Checkpoints from test import test_func from models.posenet import PoseNet opts = options() mode = opts.mode model = PoseNet(nstack=opts.nstack, inp_dim=opts.inp_dim, oup_dim=opts.oup_dim) optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr) epoch = Model_Checkpoints(opts).load_checkpoints(model, optimizer) print("Use the model which is trained by {} epoches".format(epoch)) def runner(imgs): return test_func(model, imgs=torch.Tensor(np.float32(imgs)))['preds'] def do(img): ans, scores = multiperson(img, runner, mode) if len(ans) > 0: ans = ans[:, :, :3] # print(ans) # print(scores) pred = genDtByPred(ans) for i, score in zip(pred, scores): i['score'] = float(score) return pred gts = [] preds = [] prefix = os.path.join('checkpoint', opts.continue_exp) idx = 0 img_dir = opts.img_dir last_id = 1000000 if img_dir: f_list = os.listdir(img_dir) #resume_file={'resume_file':i for i in f_list if os.path.splitext(i)[-1] == '.tar'}['resume_file'] for img_name in f_list: print('xx') img = cv2.imread(img_dir + img_name)[:, :, ::-1] cv2.imwrite('pose_results/' + img_name, img[:, :, ::-1]) preds = do(img) #with open(prefix + '/img_dt.json', 'wb') as f: #json.dump(sum([preds], []), f) for i in preds: keypoints = i['keypoints'] #if i['score']<0.15: # continue img = imread('pose_results/' + img_name, mode='RGB') draw_limbs(img, keypoints) cv2.imwrite('pose_results/' + img_name, img[:, :, ::-1]) print("{} has been estimated".format(img_name))
def main(): from main import options, Model_Checkpoints from models.posenet import PoseNet opts = options() mode = opts.mode model = PoseNet(nstack=opts.nstack, inp_dim=opts.inp_dim, oup_dim=opts.oup_dim) print(">>> total params: {:.2f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr) epoch = Model_Checkpoints(opts).load_checkpoints(model, optimizer) print("Use the model which is trained by {} epoches".format(epoch)) if opts.continue_exp is None: print("Warning: you must choose a trained model") def runner(imgs): return test_func(model, imgs=torch.Tensor(np.float32(imgs)))['preds'] def do(image_id, img): ans, scores = multiperson(img, runner, mode) if len(ans) > 0: ans = ans[:, :, :3] pred = genDtByPred(ans, image_id) for i, score in zip(pred, scores): i['score'] = float(score) return pred gts = [] preds = [] idx = 0 for image_id, img in get_img(inp_res=-1): idx += 1 preds.append(do(image_id, img)) prefix = os.path.join('checkpoint', opts.continue_exp) coco_eval(prefix, preds, gts)
def main(): args = get_args() print('----- Params for debug: ----------------') print(args) print('data = {}'.format(args.data)) print('road = {}'.format(args.road)) print('Train model ...') # Imagenet normalization in case of pre-trained network normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Resize data before using transform = transforms.Compose([ transforms.Resize(260), transforms.CenterCrop(250), transforms.ToTensor(), normalize ]) train_record = None # 'Record001' train_dataset = Apolloscape(root=args.data, road=args.road, transform=transform, record=train_record, normalize_poses=True, pose_format='quat', train=True, cache_transform=not args.no_cache_transform, stereo=args.stereo) val_record = None # 'Record011' val_dataset = Apolloscape(root=args.data, road=args.road, transform=transform, record=val_record, normalize_poses=True, pose_format='quat', train=False, cache_transform=not args.no_cache_transform, stereo=args.stereo) # Show datasets print(train_dataset) print(val_dataset) shuffle_data = True train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=shuffle_data) # batch_size = 75 val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=shuffle_data) # batch_size = 75 # Get mean and std from dataset poses_mean = val_dataset.poses_mean poses_std = val_dataset.poses_std # Select active device if torch.cuda.is_available() and args.device == 'cuda': device = torch.device('cuda') else: device = torch.device('cpu') print('device = {}'.format(device)) # Used as prefix for filenames time_str = datetime.now().strftime('%Y%m%d_%H%M%S') # Create pretrained feature extractor if args.feature_net == 'resnet18': feature_extractor = models.resnet18(pretrained=args.pretrained) elif args.feature_net == 'resnet34': feature_extractor = models.resnet34(pretrained=args.pretrained) elif args.feature_net == 'resnet50': feature_extractor = models.resnet50(pretrained=args.pretrained) # Num features for the last layer before pose regressor num_features = args.feature_net_features # 2048 experiment_name = get_experiment_name(args) # Create model model = PoseNet(feature_extractor, num_features=num_features) model = model.to(device) # Criterion criterion = PoseNetCriterion(stereo=args.stereo, beta=args.beta, learn_beta=args.learn_beta) criterion.to(device) # Add all params for optimization param_list = [{'params': model.parameters()}] if criterion.learn_beta: param_list.append({'params': criterion.parameters()}) # Create optimizer optimizer = optim.Adam(params=param_list, lr=args.lr, weight_decay=0.0005) start_epoch = 0 # Restore from checkpoint is present if args.checkpoint is not None: checkpoint_file = args.checkpoint if os.path.isfile(checkpoint_file): print('\nLoading from checkpoint: {}'.format(checkpoint_file)) checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optim_state_dict']) start_epoch = checkpoint['epoch'] if 'criterion_state_dict' in checkpoint: criterion.load_state_dict(checkpoint['criterion_state_dict']) print('Loaded criterion params too.') n_epochs = start_epoch + args.epochs print('\nTraining ...') val_freq = args.val_freq for e in range(start_epoch, n_epochs): # Train for one epoch train(train_dataloader, model, criterion, optimizer, e, n_epochs, log_freq=args.log_freq, poses_mean=train_dataset.poses_mean, poses_std=train_dataset.poses_std, device=device, stereo=args.stereo) # Run validation loop if e > 0 and e % val_freq == 0: end = time.time() validate(val_dataloader, model, criterion, e, log_freq=args.log_freq, device=device, stereo=args.stereo) # Make figure if e > 0 and args.fig_save > 0 and e % args.fig_save == 0: exp_name = '{}_{}'.format(time_str, experiment_name) make_figure(model, train_dataloader, poses_mean=poses_mean, poses_std=poses_std, epoch=e, experiment_name=exp_name, device=device, stereo=args.stereo) # Make checkpoint if e > 0 and e % args.checkpoint_save == 0: make_checkpoint(model, optimizer, criterion, epoch=e, time_str=time_str, args=args) print('\nn_epochs = {}'.format(n_epochs)) print('\n=== Test Training Dataset ======') pred_poses, gt_poses = model_results_pred_gt(model, train_dataloader, poses_mean, poses_std, device=device, stereo=args.stereo) print('gt_poses = {}'.format(gt_poses.shape)) print('pred_poses = {}'.format(pred_poses.shape)) t_loss = np.asarray([ np.linalg.norm(p - t) for p, t in zip(pred_poses[:, :3], gt_poses[:, :3]) ]) q_loss = np.asarray([ quaternion_angular_error(p, t) for p, t in zip(pred_poses[:, 3:], gt_poses[:, 3:]) ]) print('poses_std = {:.3f}'.format(np.linalg.norm(poses_std))) print('T: median = {:.3f}, mean = {:.3f}'.format(np.median(t_loss), np.mean(t_loss))) print('R: median = {:.3f}, mean = {:.3f}'.format(np.median(q_loss), np.mean(q_loss))) # Save for later visualization pred_poses_train = pred_poses gt_poses_train = gt_poses print('\n=== Test Validation Dataset ======') pred_poses, gt_poses = model_results_pred_gt(model, val_dataloader, poses_mean, poses_std, device=device, stereo=args.stereo) print('gt_poses = {}'.format(gt_poses.shape)) print('pred_poses = {}'.format(pred_poses.shape)) t_loss = np.asarray([ np.linalg.norm(p - t) for p, t in zip(pred_poses[:, :3], gt_poses[:, :3]) ]) q_loss = np.asarray([ quaternion_angular_error(p, t) for p, t in zip(pred_poses[:, 3:], gt_poses[:, 3:]) ]) print('poses_std = {:.3f}'.format(np.linalg.norm(poses_std))) print('T: median = {:.3f}, mean = {:.3f}'.format(np.median(t_loss), np.mean(t_loss))) print('R: median = {:.3f}, mean = {:.3f}'.format(np.median(q_loss), np.mean(q_loss))) # Save for later visualization pred_poses_val = pred_poses gt_poses_val = gt_poses # Save checkpoint print('\nSaving model params ....') make_checkpoint(model, optimizer, criterion, epoch=n_epochs, time_str=time_str, args=args)
def main(): cudnn.benchmark = True cudnn.enabled = True opts = options() continue_exp = opts.continue_exp model = PoseNet(nstack=opts.nstack, inp_dim=opts.inp_dim, oup_dim=opts.oup_dim, masks_flag=opts.masks_flag) #print (model) print(">>> total params: {:.2f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr) ##train datas and valid datas loader generator## data_load_func = dataload.init(opts) save_options(opts, os.path.join('log/train_option/' + opts.exp), model.__str__(), optimizer.__str__()) begin_epoch = 0 total_epochs = opts.total_epochs #choose whether continue the specified experiment checkpoint that was saved last time or not# if continue_exp: begin_epoch = Model_Checkpoints(opts).load_checkpoints( model, optimizer) print('Start training # epoch{}'.format(begin_epoch)) for epoch in range(begin_epoch, total_epochs): print('-------------Training Epoch {}-------------'.format(epoch)) #lr = adjust_lr(optimizer, epoch) #training and validation for phase in ['train', 'valid']: if phase == 'train': num_step = opts.train_iters else: num_step = opts.valid_iters generator = data_load_func(phase) print('start', phase) show_range = range(num_step) show_range = tqdm.tqdm(show_range, total=num_step, ascii=True) for i in show_range: datas = next(generator) loss = train_func(opts, model, optimizer, phase, **datas) if i % 20 == 0 and phase == 'train': niter = epoch * num_step + i writer.add_scalar('{}/Loss'.format(phase), loss.data[0], niter) if phase == 'valid': writer.add_scalar('{}/Loss'.format(phase), loss.data[0], niter) Model_Checkpoints(opts).save_checkpoints({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }) if epoch % 50 == 0 and epoch != 0: Model_Checkpoints(opts).save_checkpoints( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, filename='{}_checkpoint.pth.tar'.format(epoch))
def main(args): flow.enable_eager_execution() train_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="train", # NOTE(Liang Depeng): needs to explictly set the dataset size dataset_size=7459, batch_size=args.train_batch_size, ) val_data_loader = OFRecordDataLoader( ofrecord_root=args.ofrecord_path, mode="val", dataset_size=1990, batch_size=args.val_batch_size, ) # oneflow init start_t = time.time() posenet_module = PoseNet() if args.load_checkpoint != "": posenet_module.load_state_dict(flow.load(args.load_checkpoint)) end_t = time.time() print("init time : {}".format(end_t - start_t)) of_cross_entropy = flow.nn.CrossEntropyLoss() posenet_module.to("cuda") of_cross_entropy.to("cuda") of_sgd = flow.optim.SGD(posenet_module.parameters(), lr=args.learning_rate, momentum=args.mom) of_losses = [] all_samples = len(val_data_loader) * args.val_batch_size print_interval = 100 for epoch in range(args.epochs): posenet_module.train() for b in range(len(train_data_loader)): image, label = train_data_loader.get_batch() # oneflow train start_t = time.time() image = image.to("cuda") label = label.to("cuda") logits = posenet_module(image) loss = of_cross_entropy(logits, label) loss.backward() of_sgd.step() of_sgd.zero_grad() end_t = time.time() if b % print_interval == 0: l = loss.numpy() of_losses.append(l) print( "epoch {} train iter {} oneflow loss {}, train time : {}". format(epoch, b, l, end_t - start_t)) print("epoch %d train done, start validation" % epoch) posenet_module.eval() correct_of = 0.0 for b in range(len(val_data_loader)): image, label = val_data_loader.get_batch() start_t = time.time() image = image.to("cuda") with flow.no_grad(): logits = posenet_module(image) predictions = logits.softmax() of_predictions = predictions.numpy() clsidxs = np.argmax(of_predictions, axis=1) label_nd = label.numpy() for i in range(args.val_batch_size): if clsidxs[i] == label_nd[i]: correct_of += 1 end_t = time.time() print("epoch %d, oneflow top1 val acc: %f" % (epoch, correct_of / all_samples)) flow.save( posenet_module.state_dict(), os.path.join( args.save_checkpoint_path, "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples), ), ) writer = open("of_losses.txt", "w") for o in of_losses: writer.write("%f\n" % o) writer.close()
# t = data_dict[0].cuda(non_blocking=True) # , non_blocking=True # count += opt.batch_size # print(bath_id, ' of ', epoch) # if count > 500: # break # print('**************** ', count / (time.time() - t0)) use_cuda = torch.cuda.is_available() # 判断GPU cuda是否可用 best_loss = float('inf') start_epoch = 0 # 从0开始或者从上一个epoch开始 posenet = PoseNet(opt.nstack, opt.hourglass_inp_dim, config.num_layers, bn=False) optimizer = optim.SGD(posenet.parameters(), lr=opt.learning_rate, momentum=0.9, weight_decay=1e-4) if args.resume: print('\nResuming from checkpoint ...... ') checkpoint = torch.load( opt.ckpt_path, map_location=torch.device('cpu')) # map to cpu to save the gpu memory posenet.load_state_dict(checkpoint['weights']) print('\nNetwork weights have been resumed from checkpoint...') optimizer.load_state_dict(checkpoint['optimizer_weight']) # We must convert the resumed state data of optimizer to gpu """It is because the previous training was done on gpu, so when saving the optimizer.state_dict, the stored
# for epoch in range(20): # for bath_id, data_dict in enumerate(train_loader): # # t = data_dict[0].cuda(non_blocking=True) # , non_blocking=True # count += opt.batch_size # print(bath_id, ' of ', epoch) # if count > 500: # break # print('**************** ', count / (time.time() - t0)) use_cuda = torch.cuda.is_available() # 判断GPU cuda是否可用 best_loss = float('inf') start_epoch = 0 # 从0开始或者从上一个epoch开始 posenet = PoseNet(opt.nstack, opt.hourglass_inp_dim, config.num_layers, bn=False) optimizer = optim.SGD(posenet.parameters(), lr=opt.learning_rate, momentum=0.9, weight_decay=1e-4) if args.resume: print('\nResuming from checkpoint ...... ') checkpoint = torch.load(opt.ckpt_path, map_location=torch.device('cpu')) # map to cpu to save the gpu memory posenet.load_state_dict(checkpoint['weights']) print('\nNetwork weights have been resumed from checkpoint...') optimizer.load_state_dict(checkpoint['optimizer_weight']) # We must convert the resumed state data of optimizer to gpu """It is because the previous training was done on gpu, so when saving the optimizer.state_dict, the stored states(tensors) are of cuda version. During resuming, when we load the saved optimizer, load_state_dict() loads this cuda version to cpu. But in this project, we use map_location to map the state tensors to cpu. In the training process, we need cuda version of state tensors, so we have to convert them to gpu.""" for state in optimizer.state.values(): for k, v in state.items():