def main(): if CONFIG["CUDA"]: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device("cpu") weight_name = CONFIG["model"]["pretrained_weight"] model_dict = torch.load(weight_name) source_net = PoseEstimationWithMobileNet() target_net = PoseEstimationWithMobileNet() load_state(source_net, model_dict) load_state(target_net, model_dict) discriminator = Discriminator() criterion = nn.BCELoss() source_net = source_net.cuda(CONFIG["GPU"]["source_net"]) target_net = target_net.cuda(CONFIG["GPU"]["target_net"]) discriminator = discriminator.to(device) criterion = criterion.to(device) optimizer_tg = torch.optim.Adam(target_net.parameters(), lr=CONFIG["training_setting"]["t_lr"]) optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=CONFIG["training_setting"]["d_lr"]) dataset = ADDADataset() dataloader = DataLoader(dataset, CONFIG["dataset"]["batch_size"], shuffle=True, num_workers=0) trainer = Trainer(source_net, target_net, discriminator, dataloader, optimizer_tg, optimizer_d, criterion, device) trainer.train()
def __init__(self, checkpoint_path, device, img_mean=np.array([128, 128, 128], dtype=np.float32), img_scale=np.float32(1/255), use_tensorrt=False): from models.with_mobilenet import PoseEstimationWithMobileNet from modules.load_state import load_state self.img_mean = img_mean self.img_scale = img_scale self.device = 'cpu' if device != 'CPU': if torch.cuda.is_available(): self.device = torch.device('cuda:0') else: print('No CUDA device found, inferring on CPU') net = PoseEstimationWithMobileNet() checkpoint = torch.load(checkpoint_path, map_location='cpu') if use_tensorrt: from torch2trt import TRTModule net = TRTModule() net.load_state_dict(checkpoint) else: load_state(net, checkpoint) net = net.to(self.device) net.eval() self.net = net
def main(): net = PoseEstimationWithMobileNet() checkpoint = torch.load("models/checkpoint_iter_370000.pth", map_location='cpu') load_state(net, checkpoint) net = net.cuda() done = threading.Event() with anki_vector.AsyncRobot() as robot: robot.camera.init_camera_feed() robot.camera.image_streaming_enabled() # preparing robot pose ready robot.behavior.set_head_angle(degrees(25.0)) robot.behavior.set_lift_height(0.0) #events for detection and new camera feed robot.events.subscribe(on_new_raw_camera_image, events.Events.new_raw_camera_image, net) robot.events.subscribe_by_name(on_robot_observed_touch, event_name='touched') print( "------ waiting for camera events, press ctrl+c to exit early ------" ) try: if not done.wait(timeout=600): print("------ Did not receive a new camera image! ------") except KeyboardInterrupt: pass
def __init__(self): self.name = 'OpenPose' net = PoseEstimationWithMobileNet() checkpoint = torch.load('./checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) self.net = net.eval() if envars.USE_GPU(): self.net = self.net.cuda() self.stride = 8 self.upsample_ratio = 4 self.height_size = 256 self.kpt_names = [ 'nose', 'neck', 'r_sho', 'r_elb', 'r_wri', 'l_sho', 'l_elb', 'l_wri', 'r_hip', 'r_knee', 'r_ank', 'l_hip', 'l_knee', 'l_ank', 'r_eye', 'l_eye', 'r_ear', 'l_ear' ] self.connections = [('nose', 'r_eye'), ('r_eye', 'r_ear'), ('nose', 'l_eye'), ('l_eye', 'l_ear'), ('nose', 'neck'), ('neck', 'r_sho'), ('r_sho', 'r_elb'), ('r_elb', 'r_wri'), ('neck', 'l_sho'), ('l_sho', 'l_elb'), ('l_elb', 'l_wri'), ('neck', 'r_hip'), ('r_hip', 'r_knee'), ('r_knee', 'r_ank'), ('neck', 'l_hip'), ('l_hip', 'l_knee'), ('l_knee', 'l_ank')]
def __init__(self, checkpoint_path, scale=256.): super().__init__() self.scale = scale pose_model = PoseEstimationWithMobileNet() state_dict = torch.load(checkpoint_path) load_state(pose_model, state_dict) self.pose_model = pose_model
def openpose_to_jit(): x = torch.randn(1,3,256,456) net = PoseEstimationWithMobileNet().cpu() checkpoint = torch.load(r'.\weights\checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) net.eval() net(x) script_model = torch.jit.trace(net, x) script_model.save('test.jit')
def Load_pose2d(): """ This one runs in CPU """ net = PoseEstimationWithMobileNet() checkpoint = torch.load(pose2d_checkpoint, map_location='cpu') load_state(net, checkpoint) net = net.eval() net = net.cuda() return net
def main1(): parser = argparse.ArgumentParser( description='''Lightweight human pose estimation python demo. This is just for quick results preview. Please, consider c++ demo for the best performance.''') parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') parser.add_argument('--height-size', type=int, default=256, help='network input layer height size') parser.add_argument('--video', type=str, default='', help='path to video file or camera id') parser.add_argument('--images', nargs='+', default='', help='path to input image(s)') parser.add_argument('--images_dir', default='', help='folderpath to input image(s)') parser.add_argument('--cpu', action='store_true', help='run network inference on cpu') parser.add_argument('--track', type=int, default=1, help='track pose id in video') parser.add_argument('--smooth', type=int, default=1, help='smooth pose keypoints') args = parser.parse_args() if args.video == '' and args.images == '' and args.images_dir == '': raise ValueError('Either --video or --image has to be provided') net = PoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path, map_location='cpu') load_state(net, checkpoint) frame_provider = ImageReader(args.images) if not args.images_dir == '': frame_provider = ImageReader(args.images_dir) if args.video != '': frame_provider = VideoReader(args.video) else: args.track = 0 run_demo(net, frame_provider, args.height_size, args.cpu, args.track, args.smooth)
def init(cpu = False): net = PoseEstimationWithMobileNet() checkpoint_path = "checkpoint_iter_370000.pth" checkpoint = torch.load(checkpoint_path, map_location='cpu') #load the existing model load_state(net, checkpoint) net = net.eval() if not cpu: net = net.cuda() return net
def run_demo(args, image_provider, height_size, cpu, track, smooth): net = PoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path, map_location='cpu') load_state(net, checkpoint) net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 for d in image_provider: img, image_name = d["image"], d["image_name"] orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale keypoints_out = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 3), dtype=np.float32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose_keypoints[kpt_id, 2] = 0.94 if pose_keypoints[kpt_id, 0] != -1 else 0 keypoints_out.append(pose_keypoints) save_json(image_name, keypoints_out, args)
def load_single_pose_model(model_name, **kwargs): base_dir = kwargs.pop("base_dir", None) if not base_dir: base_dir = BASE_DIR device = kwargs.pop("device", "cpu") download(model_name, base_dir) model = SinglePersonPoseEstimationWithMobileNet(**WEIGHTS[model_name][1], **kwargs) checkpoint = torch.load(os.path.join(base_dir, model_name), map_location=torch.device(device)) load_state(model, checkpoint) return model
def __init__(self, model_path: str): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net = PoseEstimationWithMobileNet() self.net.to(self.device) checkpoint = torch.load(model_path) load_state(self.net, checkpoint) self.net.eval() self.image = None self.avg_heatmap = None self.avg_paf = None self.track = True self.stride = 8 self.upsample_ratio = 4 self.height_size = 256 self.smooth = 1 self.num_keypoints = Pose.num_kpts
def _pose_dect_init(self, device): """Initialize the pose detection model. Arguments: device {torch.device}: device to implement the models on. Returns: PoseEstimationWithMobileNet: initialized OpenPose model. """ weight_path = self.__params.pose_weights model = PoseEstimationWithMobileNet() weight = torch.load(weight_path, map_location='cpu') load_state(model, weight) model = model.eval() if device.type != 'cpu': model = model.cuda() return model
def loadEmotion(): print("-Loading pose estimation neural net...") poseEstNet = PoseEstimationWithMobileNet() poseEstNet = poseEstNet.cuda() checkpoint = torch.load( "../lightweight-human-pose-estimation.pytorch/checkpoint_iter_370000.pth", map_location='cpu') load_state(poseEstNet, checkpoint) print("-Pose estimation neural net loaded") print("-Opening body language decoders...") bodymove = BLmovements() bodydecode = BLdecode() print("-Body language decoder loaded") print("-Loading facial emotion neural net...") facialEmotionNet, faceRecNet, image_size = ssd_infer.load() print("-Facial emotion neural net loaded") return poseEstNet, bodymove, bodydecode, facialEmotionNet, faceRecNet, image_size
def upload(): if request.method == 'POST': file = request.files['file'] extension = os.path.splitext(file.filename)[1] f_name = "lastphoto.jpg" file.save(os.path.join('uploads', f_name)) image = cv2.imread("uploads/lastphoto.jpg", cv2.IMREAD_COLOR) net = PoseEstimationWithMobileNet() checkpoint = torch.load("checkpoint_iter_370000.pth.tar", map_location='cpu') load_state(net, checkpoint) return json.dumps( { 'filename': f_name, 'humans': run_demo(net, image, 256, True) }, cls=NumpyEncoder)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--experiment_name', type=str, default='test', help='name of output file with detected keypoints') parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') parser.add_argument('--visualize', type=bool, default=False, help='show keypoints') parser.add_argument('--get_feature', type=bool, default=False, help='--get_feature') parser.add_argument('--dataset_mode', type=bool, default=True, help='generate kps maps dataset for VAE') parser.add_argument('--save_maps', action='store_true', help='show keypoints') parser.add_argument('--checkpoint-path', type=str, default="checkpoints/checkpoint_anime_47.pth", help='path to the checkpoint') parser.add_argument('--dataset_folder', type=str, default="./data_anime", help='path to dataset folder') parser.add_argument('--num_kps', type=int, default=21, # need change 16 for real, 21 for anime help='number of key points') # parser.add_argument('--checkpoint-path', type=str, default="checkpoints/checkpoint_real.pth", help='path to the checkpoint') # parser.add_argument('--dataset_folder', type=str, default="./data_lip", help='path to dataset folder') # parser.add_argument('--num_kps', type=int, default=16, # need change 16 for real 21 for anime # help='number of key points') args = parser.parse_args() net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages=5, num_heatmaps=args.num_kps + 1) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) data_flag = "real" if args.dataset_folder.split("/")[-1] == "data_lip" else "anime" date = time.strftime("%m%d-%H%M%S") results_folder = 'test_results/{}{}_test'.format(args.experiment_name, date) if not os.path.exists(results_folder): os.makedirs(results_folder) ori_dataFolder = "D:\download_cache\VAEmodel\OriFrame" map_dataFolder = r"D:\download_cache\anime_data2\train" if data_flag == "real": dataset = LipTestDataset(ori_dataFolder) else: dataset = AnimeTestDataset(map_dataFolder) # TODO I have modified the datasets # TODO we need shadow like image. evaluate(dataset, results_folder, net, args.multiscale, args.visualize, args.save_maps, num_kps=args.num_kps, get_feature=args.get_feature, dataset_mode=args.dataset_mode)
def callback(self, data): try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) ## Rescale Image size rescale_factor = 1 width = int(cv_image.shape[1] * rescale_factor) height = int(cv_image.shape[0] * rescale_factor) dim = (width, height) resized_img = cv2.resize(cv_image, dim) net = PoseEstimationWithMobileNet() checkpoint = torch.load( "/home/zheng/lightweight-human-pose-estimation.pytorch/checkpoint_iter_370000.pth", map_location='cpu') load_state(net, checkpoint) height_size = 256 net = net.eval() net = net.cuda() net.eval() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 # img = cv2.imread("/home/zheng/lightweight-human-pose-estimation.pytorch/data/image_1400.jpg") img = asarray(cv_image) orig_img = img heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu="store_true") total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] ## Collect all keypoint in numpy array to send it to Ros" pose_keypoints_ros_data = np.zeros(16) my_array_for_publishing = Float32MultiArray() #### pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(8): if pose_entries[0][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[0][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[0][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[0][18]) current_poses.append(pose) pose_keypoints_ros_data[2 * kpt_id] = pose.keypoints[kpt_id][0] pose_keypoints_ros_data[2 * kpt_id + 1] = pose.keypoints[kpt_id][1] for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) my_array_for_publishing.data = [ pose_keypoints_ros_data[0], pose_keypoints_ros_data[1], pose_keypoints_ros_data[2], pose_keypoints_ros_data[3], pose_keypoints_ros_data[4], pose_keypoints_ros_data[5], pose_keypoints_ros_data[6], pose_keypoints_ros_data[7], pose_keypoints_ros_data[8], pose_keypoints_ros_data[9], pose_keypoints_ros_data[10], pose_keypoints_ros_data[11], pose_keypoints_ros_data[12], pose_keypoints_ros_data[13], pose_keypoints_ros_data[14], pose_keypoints_ros_data[15], ] # cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) self.image_pub.publish(self.bridge.cv2_to_imgmsg(img, "bgr8")) self.keypts_pub.publish(my_array_for_publishing) # cv2.imwrite('/home/zheng/Bureau/image_1400_key.jpg',img) cv2.waitKey(2)
import torch from models.with_mobilenet import PoseEstimationWithMobileNet #my particular net architecture from modules.load_state import load_state from torch2trt import torch2trt #import library import time #HERE IT IS HOW COMPILE AND SAVE A MODEL checkpoint_path = '/home/nvidia/Documents/poseFINAL/checkpoints/body.pth' #your trained weights path net = PoseEstimationWithMobileNet() #my particular net istance checkpoint = torch.load(checkpoint_path, map_location='cuda') load_state(net, checkpoint) #load your trained weights path net.cuda().eval() data = torch.rand(( 1, 3, 256, 344)).cuda() #initialize a random tensor with the shape of your input data #model_trt = torch2trt(net, [data]) #IT CREATES THE COMPILED VERSION OF YOUR MODEL, IT TAKES A WHILE #torch.save(model_trt.state_dict(), 'net_trt.pth') #TO SAVE THE WEIGHTS OF THE COMPILED MODEL WICH ARE DIFFERENT FROM THE PREVIOUS ONES #HERE IT IS HOW TO UPLOAD THE MODEL ONCE YOU HAVE COMPILED IT LIKE IN MY CASE THAT I HAVE ALREADY COMPILED IT from torch2trt import TRTModule #import a class model_trt = TRTModule() #the compiled model istance model_trt.load_state_dict(torch.load( 'net_trt.pth')) #load the compiled weights in the compiled model
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): dataset = CocoSingleTrainDataset(images_folder, transform=transforms.Compose([ HalfBodyTransform(), RandomScaleRotate(), SinglePersonFlip(left_keypoints_indice= CocoSingleTrainDataset.left_keypoints_indice, right_keypoints_indice= CocoSingleTrainDataset.right_keypoints_indice), SinglePersonRandomAffineTransform(), SinglePersonBodyMasking(), Normalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ChannelPermutation() ])) net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages, num_heatmaps=dataset._num_keypoints, mode='nearest').cuda() train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam(net.parameters(), lr=base_lr) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [170, 200], 0.1) num_iter = 0 current_epoch = 0 if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch']+1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 210): print('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].float().cuda() keypoint_maps = batch_data['keypoint_maps'] stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append(mse_loss(stages_output[loss_idx], keypoint_maps, batch_data['keypoints'][:, 2::3].view(batch_data['keypoints'].shape[0], -1, 1))) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = 0 for loss_idx in range(len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): print('\n'.join(['stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format(checkpoints_folder) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format(checkpoints_folder, epochId) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) print('Validation...') net.eval() val_dataset = CocoSingleValDataset(images_folder, transform=transforms.Compose([ SinglePersonRandomAffineTransform(mode='val'), Normalization(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])) predictions_name = '{}/val_results2.json'.format(checkpoints_folder) val_loss = val(net, val_dataset, predictions_name, 'CocoSingle') print('Val loss: {}'.format(val_loss)) scheduler.step()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() # print("show imgs" # , batch_data['keypoint_maps'].shape, batch_data['paf_maps'].shape # , batch_data['keypoint_mask'].shape, batch_data['paf_mask'].shape # , batch_data['mask'].shape, batch_data['image'].shape # ) # print("seg", batch_data['label']['segmentations']) print("batched images size", batch_data['image'].shape) vis.images(batch_data['image'][:, [2, 1, 0], ...] + 0.5, 4, 2, "1", opts=dict(title="img")) vis.images(batch_data['keypoint_mask'].permute(1, 0, 2, 3), 4, 2, "2", opts=dict(title="kp_mask")) vis.images(batch_data['paf_mask'].permute(1, 0, 2, 3), 4, 2, "3", opts=dict(title="paf_mask")) vis.images(batch_data['keypoint_maps'].permute(1, 0, 2, 3), 4, 2, "4", opts=dict(title="keypoint_maps")) vis.images(batch_data['paf_maps'].permute(1, 0, 2, 3), 4, 2, "5", opts=dict(title="paf_maps")) vis.images(batch_data['mask'].unsqueeze(0), 4, 2, "6", opts=dict(title="MASK")) images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() pafs = batch_data['paf_maps'][0].permute(1, 2, 0).numpy() scale = 4 img_p = np.zeros((pafs.shape[1] * 8, pafs.shape[0] * 8, 3), dtype=np.uint8) # pafs[pafs < 0.07] = 0 for idx in range(len(BODY_PARTS_PAF_IDS)): # print(pp, pafs.shape) pp = BODY_PARTS_PAF_IDS[idx] k_idx = BODY_PARTS_KPT_IDS[idx] cc = BODY_CONN_COLOR[idx] vx = pafs[:, :, pp[0]] vy = pafs[:, :, pp[1]] for i in range(pafs.shape[1]): for j in range(pafs.shape[0]): a = (i * 2 * scale, j * 2 * scale) b = (2 * int((i + vx[j, i] * 3) * scale), 2 * int( (j + vy[j, i] * 3) * scale)) if a[0] == b[0] and a[1] == b[1]: continue cv2.line(img_p, a, b, cc, 1) # break cv2.imshow("paf", img_p) key = cv2.waitKey(0) if key == 27: # esc exit(0) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def gen(): """Video streaming generator function.""" HOST = '' PORT = 8088 emptyPoses = [] s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print('Socket created') s.bind((HOST, PORT)) print('Socket bind complete') s.listen(10) print('Socket now listening') net = PoseEstimationWithMobileNet() checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) conn, addr = s.accept() print('ACCENPTED') data = b'' ### CHANGED payload_size = struct.calcsize("=L") ### CHANGED stepA = False stepB = False global count count = 0 sitAngle = 0 stdupAngle = 0 font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (50, 400) topLeft = (150, 400) fontScale = 3 fontColor = (255, 0, 0) lineType = 2 emptyPoses = [] while True: while len(data) < payload_size: data += conn.recv(4096) print('MESSAGESIZE') print(payload_size) packed_msg_size = data[:payload_size] data = data[payload_size:] msg_size = struct.unpack("=L", packed_msg_size)[0] ### CHANGED print('unpack') # Retrieve all data based on message size while len(data) < msg_size: data += conn.recv(4096) print(len(data)) print(msg_size) print('RECIEVED') frame_data = data[:msg_size] data = data[msg_size:] # Extract frame frame = pickle.loads(frame_data) #read_return_code, frame = vc.read() pose = run_demo(net, frame, 256, 0, 0, 1) if pose is not None: pose.draw(frame) #cv2.imshow('test', frame) if cv2.waitKey(1) == ord('q'): break A = np.array([pose.keypoints[8][0], pose.keypoints[8][1]]) B = np.array([pose.keypoints[9][0], pose.keypoints[9][1]]) C = np.array([pose.keypoints[10][0], pose.keypoints[10][1]]) BA = A - B BC = C - B cosine_angle = np.dot( BA, BC) / (np.linalg.norm(BA) * np.linalg.norm(BC)) angle = np.arccos(cosine_angle) angle = np.degrees(angle) #print(angle) if angle > 140: stepA = True if stdupAngle is 0: stdupAngle = angle if angle > stdupAngle: stdupAngle = angle if angle < 70: stepB = True if sitAngle is 0: sitAngle = angle if angle < sitAngle: sitAngle = angle if stepA and stepB is True: if angle > 140: stepA = False stepB = False count += 1 if sitAngle > 60: cv2.putText(frame, "Bend your knee more", topLeft, font, fontScale, fontColor, lineType) stdupDiff = 140 - stdupAngle sitDiff = 70 - sitAngle stdupDiff = abs(stdupDiff) sitDiff = abs(sitDiff) correctness = 280 - (stdupDiff + sitDiff) cv2.putText(frame, "correctness" + str(correctness), bottomLeftCornerOfText, font, fontScale, fontColor, lineType) sitAngle = 0 stdupAngle = 0 if stepA is True and stepB is False: if angle > 140: stepA = False stepB = False sitAngle = 0 stdupAngle = 0 cv2.putText(frame, "count" + str(count), bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #cv2.imshow("img", frame) print(count) encode_return_code, image_buffer = cv2.imencode('.jpg', frame) io_buf = io.BytesIO(image_buffer) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + io_buf.read() + b'\r\n')
def train( images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after, ): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages).cuda() stride = 8 sigma = 7 dataset = LipTrainDataset( images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip( left_keypoints_indice=LipTrainDataset.left_keypoints_indice, right_keypoints_indice=LipTrainDataset.right_keypoints_indice, ), ]), ) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam( [ { "params": get_parameters_conv(net.model, "weight") }, { "params": get_parameters_conv_depthwise(net.model, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.model, "weight"), "weight_decay": 0 }, { "params": get_parameters_bn(net.model, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv(net.cpm, "weight"), "lr": base_lr }, { "params": get_parameters_conv(net.cpm, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv_depthwise(net.cpm, "weight"), "weight_decay": 0, }, { "params": get_parameters_conv(net.initial_stage, "weight"), "lr": base_lr }, { "params": get_parameters_conv(net.initial_stage, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_bn(net.initial_stage, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.initial_stage, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, { "params": get_parameters_conv(net.refinement_stages, "weight"), "lr": base_lr * 4, }, { "params": get_parameters_conv(net.refinement_stages, "bias"), "lr": base_lr * 8, "weight_decay": 0, }, { "params": get_parameters_bn(net.refinement_stages, "weight"), "weight_decay": 0, }, { "params": get_parameters_bn(net.refinement_stages, "bias"), "lr": base_lr * 2, "weight_decay": 0, }, ], lr=base_lr, weight_decay=5e-4, ) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint["optimizer"]) scheduler.load_state_dict(checkpoint["scheduler"]) num_iter = checkpoint["iter"] current_epoch = checkpoint["current_epoch"] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print("Epoch: {}".format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data["image"].cuda() keypoint_maps = batch_data["keypoint_maps"].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print("Iter: {}".format(num_iter)) for loss_idx in range(len(total_losses)): print("\n".join(["stage{}_heatmaps_loss: {}"]).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = "{}/checkpoint_last_epoch.pth".format( checkpoints_folder) torch.save( { "state_dict": net.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict(), "iter": num_iter, "current_epoch": epochId, }, snapshot_name, ) if (epochId + 1) % checkpoint_after == 0: snapshot_name = "{}/checkpoint_epoch_{}.pth".format( checkpoints_folder, epochId) torch.save( { "state_dict": net.module.state_dict(), "optimizer": optimizer.state_dict(), "scheduler": scheduler.state_dict(), "iter": num_iter, "current_epoch": epochId, }, snapshot_name, ) print("Validation...") net.eval() eval_num = 1000 val_dataset = LipValDataset(images_folder, eval_num) predictions_name = "{}/val_results.csv".format(checkpoints_folder) evaluate(val_dataset, predictions_name, net) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh print("Val loss: {}".format(val_loss)) scheduler.step(val_loss, epochId)
def genSingleImg(dataFolder = r"D:\work\pycharmproject\Real2Animation-video-generation\demo2\reference"): parser = argparse.ArgumentParser() parser.add_argument('--experiment_name', type=str, default='test', help='name of output file with detected keypoints') parser.add_argument('--multiscale', action='store_true', help='average inference results over multiple scales') parser.add_argument('--visualize', type=bool, default=False, help='show keypoints') parser.add_argument('--save_maps', action='store_true', help='show keypoints') parser.add_argument('--checkpoint-path', type=str, default="D:/work/pycharmproject/Real2Animation-video-generation/pose_estimate/gccpm-look-into/" "checkpoints/checkpoint_anime_47.pth", help='path to the checkpoint') parser.add_argument('--dataset_folder', type=str, default="./data_anime", help='path to dataset folder') parser.add_argument('--num_kps', type=int, default=21, # need change 16 for real, 21 for anime help='number of key points') args = parser.parse_args() net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages=5, num_heatmaps=args.num_kps + 1) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) results_folder = 'test_results/{}{}_test'.format(args.experiment_name, "_final") if not os.path.exists(results_folder): os.makedirs(results_folder) dataset = SingleAnimeDataset(dataFolder) # TODO I have modified the datasets # TODO we need shadow like image. tmp_kps_dir = os.path.join(os.path.dirname(dataFolder),"tmpK") net = net.cuda().eval() base_height = 256 scales = [1] stride = 8 output_name = os.path.join(results_folder, "kps_results.csv") res_file = open(output_name, 'w') pose_dir = os.path.join(dataFolder, "pose_dataset") if not os.path.exists(pose_dir): os.mkdir(pose_dir) for sample_id in range(len(dataset)): sample = dataset[sample_id] file_name = sample['file_name'] img = sample['image'] avg_heatmaps = infer(net, img, scales, base_height, stride, num_kps=args.num_kps) all_keypoints = [] for kpt_idx in range(args.num_kps): all_keypoints.append(extract_keypoints(avg_heatmaps[:, :, kpt_idx])) for id in range(len(all_keypoints)): keypoint = all_keypoints[id] if keypoint[0] != -1: # if colors[id] == (255, 0, 0): # cv2.circle(img, (int(keypoint[0]), int(keypoint[1])), # radius + 2, (255, 0, 0), -1) # else: cv2.circle(img, (int(keypoint[0]), int(keypoint[1])), 10, (255,255,255), -1) img_name = os.path.join(pose_dir, file_name) cv2.imwrite(img_name, img) np.save(os.path.join(tmp_kps_dir,file_name+'.npy'),np.array(all_keypoints)) np.save(os.path.join(tmp_kps_dir, "animeImage.jpg.npy"), np.array(all_keypoints))
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after, num_kps, finetune=False): net = SinglePersonPoseEstimationWithMobileNet( num_refinement_stages=num_refinement_stages, num_heatmaps=num_kps + 1).cuda() stride = 8 sigma = 7 # num of kps is default 16 ,+bg=17 # the img size is arbitrary , flip may not need data_flag = "real" if images_folder.split( "/")[-1] == "data_lip" else "anime" train_log = get_logger(checkpoints_folder, cmd_stream=True) if data_flag == "real": dataset = LipTrainDataset(images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) else: dataset = AnimeTrainDataset( images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256) ])) # b=32 default train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) backbone_p = [{ 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] cpm_p = [{ 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }] initial_p = [{ 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] refine_p = [{ 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }] opt_p = [] #TODO modify params needed update above and change the model structure. if not finetune: opt_p += backbone_p opt_p += cpm_p opt_p += initial_p opt_p += refine_p optimizer = optim.Adam(opt_p, lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net, device_ids=[0]) net.train() for epochId in range(current_epoch, 100): train_log.debug('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() stages_output = net(images) losses = [] # guess to update the init stage + refinement stages for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue #per 100 iter if num_iter % log_after == 0: train_log.debug('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): train_log.debug('\n'.join([ 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format( checkpoints_folder) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format( checkpoints_folder, epochId) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) train_log.debug('Validation...') net.eval() eval_num = 1000 if data_flag == "real": val_dataset = LipValDataset(images_folder, eval_num) else: val_dataset = AnimeValDataset(images_folder, eval_num) predictions_name = '{}/val_results.csv'.format(checkpoints_folder) evaluate(val_dataset, predictions_name, net, num_kps=num_kps) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh train_log.debug('Val loss: {}'.format(val_loss)) scheduler.step(val_loss, epochId)
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages)#---------------------------------for training, define a PoseEstimation model stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip()])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) #If you need to move a model to GPU via.cuda(), please do so before # constructing optimizers for it.Parameters of a model after.cuda() will be different objects with those before the call. optimizer = optim.Adam([ {'params': get_parameters_conv(net.model, 'weight')}, {'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0}, {'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4}, {'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] #torch.optim.lr_scheduler provides several methods to adjust the learning rate based on the number of epochs. #------------------------VVI scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: #--------check if the training needs to be continued from a certain pth checkpoint checkpoint = torch.load(checkpoint_path)#-------------VVI: it can be wts for other parts along with the mobile net wts or only the mobilenet if from_mobilenet: load_from_mobilenet(net, checkpoint)#target, source else: load_state(net, checkpoint) if not weights_only:#--------------------------------------If you want to load not only the weights but also other parameters optimizer.load_state_dict(checkpoint['optimizer'])#-----------------when we save a model we save not only weights but also things like lr and thus scheduler.load_state_dict(checkpoint['scheduler'])#-----------------we can load them like this num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280):#------------------------------------------------------training for only 280 epochs print("This is Epoch No ",str(epochId)) scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() # import time # print(images.shape) # time.sleep(222) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}_after_37000.pth'.format(checkpoints_folder, num_iter) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages, num_heatmaps=18).cuda() train_dataset = dtst_train(images_folder, STRIDE, SIGMA, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_dataset = dtst_val(images_folder, STRIDE, SIGMA) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print('Epoch: {}'.format(epochId)) N_losses = num_refinement_stages + 1 total_losses = [0] * N_losses # heatmaps loss per stage for batch in train_loader: images = batch['image'].cuda() keypoint_maps = batch['keypoint_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(N_losses): loss = l2_loss(stages_output[loss_idx], keypoint_maps, len(images)) losses.append(loss) total_losses[loss_idx] += loss.item() optimizer.zero_grad() loss = losses[0] for i in range(1, N_losses): loss += losses[i] loss.backward() optimizer.step() num_iter += 1 if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) # for loss_idx in range(N_losses): # print('\n'.join(['stage{}_heatmaps_loss: {}']).format( # loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(N_losses): total_losses[loss_idx] = 0 validate2(epochId, net, val_loader, scheduler) snapshot_name = '{}/{}_epoch_last.pth'.format(checkpoints_folder, DATASET) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if epochId % checkpoint_after == 0: snapshot_name = '{}/{}_epoch_{}.pth'.format( checkpoints_folder, DATASET, epochId) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) validate2(epochID, net, val_loader, scheduler)
def __init__(self, model_path): self.model = PoseEstimationWithMobileNet() checkpoint = torch.load(model_path, map_location='cpu') load_state(self.model, checkpoint) self.model = self.model.eval() self.model = self.model.cuda()
default='data/2.jpg', help='path to input image(s)') parser.add_argument('--cpu', action='store_true', help='run network inference on cpu') parser.add_argument('--track', type=int, default=1, help='track pose id in video') parser.add_argument('--smooth', type=int, default=1, help='smooth pose keypoints') args = parser.parse_args() if args.video == '' and args.images == '': raise ValueError('Either --video or --image has to be provided') net = PoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path, map_location='cpu') load_state(net, checkpoint) frame_provider = ImageReader(args.images) if args.video != '': frame_provider = VideoReader(args.video) else: args.track = 0 run_demo(net, frame_provider, args.height_size, args.cpu, args.track, args.smooth)
def train(images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, checkpoint_after): net = SinglePersonPoseEstimationWithMobileNet(num_refinement_stages).cuda() stride = 8 sigma = 7 dataset = LipTrainDataset(images_folder, stride, sigma, transform=transforms.Compose([ SinglePersonBodyMasking(), ChannelPermutation(), SinglePersonRotate(pad=(128, 128, 128), max_rotate_degree=40), SinglePersonCropPad(pad=(128, 128, 128), crop_x=256, crop_y=256), SinglePersonFlip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=5, threshold=1e-2, verbose=True) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] num_iter = num_iter // log_after * log_after # round iterations, to print proper loss when resuming current_epoch = checkpoint['current_epoch'] + 1 net = DataParallel(net) net.train() for epochId in range(current_epoch, 100): print('Epoch: {}'.format(epochId)) net.train() total_losses = [0] * (num_refinement_stages + 1 ) # heatmaps loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses)): losses.append( l2_loss(stages_output[loss_idx], keypoint_maps, images.shape[0])) total_losses[loss_idx] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses)): print('\n'.join(['stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 snapshot_name = '{}/checkpoint_last_epoch.pth'.format( checkpoints_folder) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if (epochId + 1) % checkpoint_after == 0: snapshot_name = '{}/checkpoint_epoch_{}.pth'.format( checkpoints_folder, epochId + 1) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) print('Validation...') net.eval() eval_num = 1000 val_dataset = LipValDataset(images_folder, eval_num) predictions_name = '{}/val_results.csv'.format(checkpoints_folder) evaluate(val_dataset, predictions_name, net) pck = calc_pckh(val_dataset.labels_file_path, predictions_name, eval_num=eval_num) val_loss = 100 - pck[-1][-1] # 100 - avg_pckh print('Val loss: {}'.format(val_loss)) scheduler.step(val_loss, epochId)