def load_single_pose_model(model_name, **kwargs): base_dir = kwargs.pop("base_dir", None) if not base_dir: base_dir = BASE_DIR device = kwargs.pop("device", "cpu") download(model_name, base_dir) model = SinglePersonPoseEstimationWithMobileNet(**WEIGHTS[model_name][1], **kwargs) checkpoint = torch.load(os.path.join(base_dir, model_name), map_location=torch.device(device)) load_state(model, checkpoint) return model
def main(): parser = argparse.ArgumentParser() parser.add_argument('--experiment_name', type=str, default='test', help='name of output file with detected keypoints') parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') parser.add_argument('--visualize', type=bool, default=False, help='show keypoints') parser.add_argument('--get_feature', type=bool, default=False, help='--get_feature') parser.add_argument('--dataset_mode', type=bool, default=True, help='generate kps maps dataset for VAE') parser.add_argument('--save_maps', action='store_true', help='show keypoints') parser.add_argument('--checkpoint-path', type=str, default="checkpoints/checkpoint_anime_47.pth", help='path to the checkpoint') parser.add_argument('--dataset_folder', type=str, default="./data_anime", help='path to dataset folder') parser.add_argument('--num_kps', type=int, default=21, # need change 16 for real, 21 for anime help='number of key points') # parser.add_argument('--checkpoint-path', type=str, default="checkpoints/checkpoint_real.pth", help='path to the checkpoint') # parser.add_argument('--dataset_folder', type=str, default="./data_lip", help='path to dataset folder') # parser.add_argument('--num_kps', type=int, default=16, # need change 16 for real 21 for anime # help='number of key points') args = parser.parse_args() net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages=5, num_heatmaps=args.num_kps + 1) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) data_flag = "real" if args.dataset_folder.split("/")[-1] == "data_lip" else "anime" date = time.strftime("%m%d-%H%M%S") results_folder = 'test_results/{}{}_test'.format(args.experiment_name, date) if not os.path.exists(results_folder): os.makedirs(results_folder) ori_dataFolder = "D:\download_cache\VAEmodel\OriFrame" map_dataFolder = r"D:\download_cache\anime_data2\train" if data_flag == "real": dataset = LipTestDataset(ori_dataFolder) else: dataset = AnimeTestDataset(map_dataFolder) # TODO I have modified the datasets # TODO we need shadow like image. evaluate(dataset, results_folder, net, args.multiscale, args.visualize, args.save_maps, num_kps=args.num_kps, get_feature=args.get_feature, dataset_mode=args.dataset_mode)
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after, num_kps, finetune=False): net = SinglePersonPoseEstimationWithMobileNet( num_refinement_stages=num_refinement_stages, num_heatmaps=num_kps + 1).cuda() stride = 8 sigma = 7 # num of kps is default 16 ,+bg=17 # the img size is arbitrary , flip may not need data_flag = "real" if images_folder.split( "/")[-1] == "data_lip" else "anime" train_log = get_logger(checkpoints_folder, cmd_stream=True) if data_flag == "real": dataset = LipTrainDataset(images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) else: dataset = AnimeTrainDataset( images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256) ])) # b=32 default train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) backbone_p = [{ 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] cpm_p = [{ 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }] initial_p = [{ 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] refine_p = [{ 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] opt_p = [] #TODO modify params needed update above and change the model structure. if not finetune: opt_p += backbone_p opt_p += cpm_p opt_p += initial_p opt_p += refine_p optimizer = optim.Adam(opt_p, lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net, device_ids=[0]) net.train() for epochId in range(current_epoch, 100): train_log.debug('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() stages_output = net(images) losses = [] # guess to update the init stage + refinement stages for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue #per 100 iter if num_iter % log_after == 0: train_log.debug('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): train_log.debug('\n'.join([ 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format( checkpoints_folder) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format( checkpoints_folder, epochId) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) train_log.debug('Validation...') net.eval() eval_num = 1000 if data_flag == "real": val_dataset = LipValDataset(images_folder, eval_num) else: val_dataset = AnimeValDataset(images_folder, eval_num) predictions_name = '{}/val_results.csv'.format(checkpoints_folder) evaluate(val_dataset, predictions_name, net, num_kps=num_kps) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh train_log.debug('Val loss: {}'.format(val_loss)) scheduler.step(val_loss, epochId)
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): dataset = CocoSingleTrainDataset(images_folder, transform=transforms.Compose([ HalfBodyTransform(), RandomScaleRotate(), SinglePersonFlip(left_keypoints_indice= CocoSingleTrainDataset.left_keypoints_indice, right_keypoints_indice= CocoSingleTrainDataset.right_keypoints_indice), SinglePersonRandomAffineTransform(), SinglePersonBodyMasking(), Normalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ChannelPermutation() ])) net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages, num_heatmaps=dataset._num_keypoints, mode='nearest').cuda() train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam(net.parameters(), lr=base_lr) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [170, 200], 0.1) num_iter = 0 current_epoch = 0 if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch']+1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 210): print('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].float().cuda() keypoint_maps = batch_data['keypoint_maps'] stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append(mse_loss(stages_output[loss_idx], keypoint_maps, batch_data['keypoints'][:, 2::3].view(batch_data['keypoints'].shape[0], -1, 1))) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = 0 for loss_idx in range(len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): print('\n'.join(['stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format(checkpoints_folder) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format(checkpoints_folder, epochId) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) print('Validation...') net.eval() val_dataset = CocoSingleValDataset(images_folder, transform=transforms.Compose([ SinglePersonRandomAffineTransform(mode='val'), Normalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])) predictions_name = '{}/val_results2.json'.format(checkpoints_folder) val_loss = val(net, val_dataset, predictions_name, 'CocoSingle') print('Val loss: {}'.format(val_loss)) scheduler.step()
'--name-dataset', type=str, required=True, choices=['CocoSingle', 'Lip'], help='name dataset for validation: <Lip> or <CocoSingle>') args = parser.parse_args() if args.name_dataset == 'CocoSingle': val_dataset = CocoSingleValDataset( args.dataset_folder, transform=transforms.Compose([ SinglePersonRandomAffineTransform(mode='val'), Normalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) num_heatmaps = val_dataset._num_keypoints elif args.name_dataset == "Lip": val_dataset = LipValDataset(args.dataset_folder) num_heatmaps = val_dataset._num_keypoints + 1 else: raise RuntimeError("Unknown dataset.") net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages=5, num_heatmaps=num_heatmaps) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) val_loss = val(net, val_dataset, args.output_name, args.name_dataset) print('Val loss: {}'.format(val_loss))
def train( images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after, ): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages).cuda() stride = 8 sigma = 7 dataset = LipTrainDataset( images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip( left_keypoints_indice=LipTrainDataset.left_keypoints_indice, right_keypoints_indice=LipTrainDataset.right_keypoints_indice, ), ]), ) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam( [ { "params": get_parameters_conv(net.model, "weight") }, { "params": get_parameters_conv_depthwise(net.model, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.model, "weight"), "weight_decay": 0 }, { "params": get_parameters_bn(net.model, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv(net.cpm, "weight"), "lr": base_lr }, { "params": get_parameters_conv(net.cpm, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv_depthwise(net.cpm, "weight"), "weight_decay": 0, }, { "params": get_parameters_conv(net.initial_stage, "weight"), "lr": base_lr }, { "params": get_parameters_conv(net.initial_stage, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_bn(net.initial_stage, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.initial_stage, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv(net.refinement_stages, "weight"), "lr": base_lr * 4, }, { "params": get_parameters_conv(net.refinement_stages, "bias"), "lr": base_lr * 8, "weight_decay": 0, }, { "params": get_parameters_bn(net.refinement_stages, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.refinement_stages, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, ], lr=base_lr, weight_decay=5e-4, ) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) num_iter = checkpoint["iter"] current_epoch = checkpoint["current_epoch"] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print("Epoch: {}".format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data["image"].cuda() keypoint_maps = batch_data["keypoint_maps"].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print("Iter: {}".format(num_iter)) for loss_idx in range(len(total_losses)): print("\n".join(["stage{}_heatmaps_loss: {}"]).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = "{}/checkpoint_last_epoch.pth".format( checkpoints_folder) torch.save( { "state_dict": net.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict(), "iter": num_iter, "current_epoch": epochId, }, snapshot_name, ) if (epochId + 1) % checkpoint_after == 0: snapshot_name = "{}/checkpoint_epoch_{}.pth".format( checkpoints_folder, epochId) torch.save( { "state_dict": net.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict(), "iter": num_iter, "current_epoch": epochId, }, snapshot_name, ) print("Validation...") net.eval() eval_num = 1000 val_dataset = LipValDataset(images_folder, eval_num) predictions_name = "{}/val_results.csv".format(checkpoints_folder) evaluate(val_dataset, predictions_name, net) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh print("Val loss: {}".format(val_loss)) scheduler.step(val_loss, epochId)
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages, num_heatmaps=18).cuda() train_dataset = dtst_train(images_folder, STRIDE, SIGMA, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_dataset = dtst_val(images_folder, STRIDE, SIGMA) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print('Epoch: {}'.format(epochId)) N_losses = num_refinement_stages + 1 total_losses = [0] * N_losses # heatmaps loss per stage for batch in train_loader: images = batch['image'].cuda() keypoint_maps = batch['keypoint_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(N_losses): loss = l2_loss(stages_output[loss_idx], keypoint_maps, len(images)) losses.append(loss) total_losses[loss_idx] += loss.item() optimizer.zero_grad() loss = losses[0] for i in range(1, N_losses): loss += losses[i] loss.backward() optimizer.step() num_iter += 1 if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) # for loss_idx in range(N_losses): # print('\n'.join(['stage{}_heatmaps_loss: {}']).format( # loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(N_losses): total_losses[loss_idx] = 0 validate2(epochId, net, val_loader, scheduler) snapshot_name = '{}/{}_epoch_last.pth'.format(checkpoints_folder, DATASET) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if epochId % checkpoint_after == 0: snapshot_name = '{}/{}_epoch_{}.pth'.format( checkpoints_folder, DATASET, epochId) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) validate2(epochID, net, val_loader, scheduler)
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages).cuda() stride = 8 sigma = 7 dataset = LipTrainDataset(images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): print('\n'.join(['stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format( checkpoints_folder) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format( checkpoints_folder, epochId + 1) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) print('Validation...') net.eval() eval_num = 1000 val_dataset = LipValDataset(images_folder, eval_num) predictions_name = '{}/val_results.csv'.format(checkpoints_folder) evaluate(val_dataset, predictions_name, net) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh print('Val loss: {}'.format(val_loss)) scheduler.step(val_loss, epochId)
def convert_to_onnx(net, output_name): input_names = ['data'] input = torch.randn(1, 3, 256, 256) output_names = ['stage_1_output_1_heatmaps', 'stage_2_output_1_heatmaps'] torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx', help='name of output model in ONNX format') args = parser.parse_args() net = SinglePersonPoseEstimationWithMobileNet(to_onnx=True) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) convert_to_onnx(net, args.output_name)
def genSingleImg(dataFolder = r"D:\work\pycharmproject\Real2Animation-video-generation\demo2\reference"): parser = argparse.ArgumentParser() parser.add_argument('--experiment_name', type=str, default='test', help='name of output file with detected keypoints') parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') parser.add_argument('--visualize', type=bool, default=False, help='show keypoints') parser.add_argument('--save_maps', action='store_true', help='show keypoints') parser.add_argument('--checkpoint-path', type=str, default="D:/work/pycharmproject/Real2Animation-video-generation/pose_estimate/gccpm-look-into/" "checkpoints/checkpoint_anime_47.pth", help='path to the checkpoint') parser.add_argument('--dataset_folder', type=str, default="./data_anime", help='path to dataset folder') parser.add_argument('--num_kps', type=int, default=21, # need change 16 for real, 21 for anime help='number of key points') args = parser.parse_args() net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages=5, num_heatmaps=args.num_kps + 1) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) results_folder = 'test_results/{}{}_test'.format(args.experiment_name, "_final") if not os.path.exists(results_folder): os.makedirs(results_folder) dataset = SingleAnimeDataset(dataFolder) # TODO I have modified the datasets # TODO we need shadow like image. tmp_kps_dir = os.path.join(os.path.dirname(dataFolder),"tmpK") net = net.cuda().eval() base_height = 256 scales = [1] stride = 8 output_name = os.path.join(results_folder, "kps_results.csv") res_file = open(output_name, 'w') pose_dir = os.path.join(dataFolder, "pose_dataset") if not os.path.exists(pose_dir): os.mkdir(pose_dir) for sample_id in range(len(dataset)): sample = dataset[sample_id] file_name = sample['file_name'] img = sample['image'] avg_heatmaps = infer(net, img, scales, base_height, stride, num_kps=args.num_kps) all_keypoints = [] for kpt_idx in range(args.num_kps): all_keypoints.append(extract_keypoints(avg_heatmaps[:, :, kpt_idx])) for id in range(len(all_keypoints)): keypoint = all_keypoints[id] if keypoint[0] != -1: # if colors[id] == (255, 0, 0): # cv2.circle(img, (int(keypoint[0]), int(keypoint[1])), # radius + 2, (255, 0, 0), -1) # else: cv2.circle(img, (int(keypoint[0]), int(keypoint[1])), 10, (255,255,255), -1) img_name = os.path.join(pose_dir, file_name) cv2.imwrite(img_name, img) np.save(os.path.join(tmp_kps_dir,file_name+'.npy'),np.array(all_keypoints)) np.save(os.path.join(tmp_kps_dir, "animeImage.jpg.npy"), np.array(all_keypoints))
type=str, required=True, help='path to the checkpoint') parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx', help='name of output model in ONNX format') parser.add_argument('--single-person', action='store_true', help='convert model for single-person pose estimation') parser.add_argument('--input-size', nargs='+', type=int, required=True, help='Size of input image in format: height width') parser.add_argument('--mode-interpolation', type=str, required=False, default='bilinear', help='type interpolation <bilinear> or <nearest>') args = parser.parse_args() net = PoseEstimationWithMobileNet() if args.single_person: net = SinglePersonPoseEstimationWithMobileNet( mode=args.mode_interpolation) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) convert_to_onnx(net, args.output_name, args.single_person, args.input_size)
def convert_to_onnx(net, output_name, single_person): input = torch.randn(1, 3, 256, 456) input_names = ['data'] output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] if single_person: input = torch.randn(1, 3, 256, 256) output_names = ['stage_0_output_1_heatmaps', 'stage_1_output_1_heatmaps'] torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') parser.add_argument('--output-name', type=str, default='human-pose-estimation.onnx', help='name of output model in ONNX format') parser.add_argument('--single-person', action='store_true', help='convert model for single-person pose estimation') args = parser.parse_args() net = PoseEstimationWithMobileNet() if args.single_person: net = SinglePersonPoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) convert_to_onnx(net, args.output_name, args.single_person)
type=int, required=True, help="Size of input image in format: height width", ) parser.add_argument( "--mode-interpolation", type=str, required=False, default="bilinear", help="type interpolation <bilinear> or <nearest>", ) parser.add_argument( "--num-refinement-stages", type=int, default=1, help="number of refinement stages", ) args = parser.parse_args() net = PoseEstimationWithMobileNet() if args.single_person: net = SinglePersonPoseEstimationWithMobileNet( mode=args.mode_interpolation, num_refinement_stages=args.num_refinement_stages, ) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) convert_to_onnx(net, args.output_name, args.single_person, args.input_size)