Ejemplo n.º 1
0
    def __init__(self):
        super(UrbanEnv, self).__init__()

        # Initialize environment parameters
        self.town = carla_config.town
        self.state_y = carla_config.grid_height
        self.state_x = carla_config.grid_width
        self.channel = carla_config.features

        # Sensors
        self.rgb_sensor = None
        self.semantic_sensor = None

        # Planners
        self.planner = None

        # States and actions
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(carla_config.grid_height,
                                                   carla_config.grid_width,
                                                   carla_config.features),
                                            dtype=np.uint8)
        self.action_space = spaces.Discrete(carla_config.N_DISCRETE_ACTIONS)

        self.ego_vehicle = None
        self.current_speed = 0.0

        # Rendering related
        self.renderer = None
        self.is_render_enabled = carla_config.render

        if self.is_render_enabled:
            self.renderer = Renderer()
            self.init_renderer()
Ejemplo n.º 2
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True)
        self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device)      # feature extraction model
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                            lr = self.options.lr,
                                            weight_decay=0)
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size = 16,
                         create_transl=False).to(self.device)
        # per vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # keypoints loss including 2D and 3D
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # SMPL parameters loss if we have
        self.criterion_regr = nn.MSELoss().to(self.device)

        self.models_dict = {'model':self.model}
        self.optimizers_dict = {'optimizer':self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH
        # initialize MVSMPLify
        self.mvsmplify = MVSMPLify(step_size=1e-2, batch_size=16, num_iters=100,focal_length=self.focal_length)
        print(self.options.pretrained_checkpoint)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(checkpoint_file = self.options.pretrained_checkpoint)
        #load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)
        # create renderer
        self.renderer = Renderer(focal_length=self.focal_length, img_res = 224, faces=self.smpl.faces)
Ejemplo n.º 3
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True)

        self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.options.lr,
                                          weight_decay=0)
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=self.options.batch_size,
                         create_transl=False).to(self.device)
        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH
        self.conf_thresh = self.options.conf_thresh

        # Initialize SMPLify fitting module
        self.smplify = SMPLify(step_size=1e-2, batch_size=self.options.batch_size, num_iters=self.options.num_smplify_iters, focal_length=self.focal_length, prior_mul=0.1, conf_thresh=self.conf_thresh)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(checkpoint_file=self.options.pretrained_checkpoint)

        # Load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)

        # Create renderer
        self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces)
Ejemplo n.º 4
0
    def __init__(self, light_variability=(20,8), gridlines_on=None, gridlines_width=None, gridlines_spacing=None):
        if gridlines_on or gridlines_width or gridlines_spacing:
            assert not (gridlines_on is None\
                or gridlines_width is None\
                or gridlines_spacing is None),\
                "All gridlines variables must be set if any are"

        self.rend = Renderer()

        self.shapes = []
        self.grid_shapes = []

        self.center = np.array((0, 140, 300))

        self.light_variability = light_variability

        self.background_prims = []
        background_lower_bound = -1e3
        background_upper_bound = 1e3
        wall_bound = 1e3
        self.background_prims.append(
            Tri([(-wall_bound, 0, wall_bound),
                (wall_bound, 0, wall_bound),
                (-wall_bound, 0, -wall_bound)]))
        self.background_prims.append(
            Tri([(-wall_bound, 0, -wall_bound),
                (wall_bound, 0, wall_bound),
                (wall_bound, 0, -wall_bound)]))
        self.background_prims.append(
            Tri([(-wall_bound, -50, wall_bound),
                (0, wall_bound, wall_bound),
                (wall_bound, -50, wall_bound)]))

        if gridlines_on:
            for i in range(int((background_upper_bound - background_lower_bound) / (gridlines_width + gridlines_spacing))):
                offset = i * (gridlines_width + gridlines_spacing)
                self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_lower_bound),
                                            (background_lower_bound + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)]))
                self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)]))
                self.grid_shapes.append(Tri([(background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset),
                                             (background_upper_bound, 0.01, background_lower_bound + offset),
                                             (background_lower_bound, 0.01, background_lower_bound + offset)]))
                self.grid_shapes.append(Tri([(background_upper_bound, 0.01, background_lower_bound + offset),
                                             (background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset),
                                            (background_upper_bound, 0.01, background_lower_bound + gridlines_width + offset)]))

        self.default_light = np.array((400, 300, -800))
        self.default_intensity = 1000000
        self.camera = Cam((0, 140, 300), (128, 128))
Ejemplo n.º 5
0
def render_plot(img, poses, bboxes):
    renderer = Renderer(vertices_path="./pose_references/vertices_trans.npy",
                        triangles_path="./pose_references/triangles.npy")

    (w, h) = img.size
    image_intrinsics = np.array([[w + h, 0, w // 2], [0, w + h, h // 2],
                                 [0, 0, 1]])

    trans_vertices = renderer.transform_vertices(img, poses)
    img = renderer.render(img, trans_vertices, alpha=1)
    # for bbox in bboxes:
    #     bbox = bbox.astype(np.uint8)
    #     print(bbox)
    #     img = cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255,0,0), 2)
    return img
Ejemplo n.º 6
0
    def __init__(self, gc, gender):
        data_dir = osp.join(global_var.ROOT, 'pca')
        style_model = np.load(
            osp.join(data_dir, 'style_model_{}.npz'.format(gc)))

        self.renderer = Renderer(512)
        self.img = None
        self.body = trimesh.load(osp.join(global_var.ROOT, 'smpl',
                                          'hres_{}.obj'.format(gender)),
                                 process=False)

        with open(osp.join(global_var.ROOT, 'garment_class_info.pkl'),
                  'rb') as f:
            garment_meta = pickle.load(f)
        # self.vert_indcies = garment_meta[gc]['vert_indices']
        self.f = garment_meta[gc]['f']

        self.gamma = np.zeros([1, 4], dtype=np.float32)
        self.pca = PCA(n_components=4)
        self.pca.components_ = style_model['pca_w']
        self.pca.mean_ = style_model['mean']
        win_name = '{}_{}'.format(gc, gender)
        cv2.namedWindow(win_name)
        cv2.createTrackbar('0', win_name, 100, 200, self.value_change)
        cv2.createTrackbar('1', win_name, 100, 200, self.value_change)
        cv2.createTrackbar('2', win_name, 100, 200, self.value_change)
        cv2.createTrackbar('3', win_name, 100, 200, self.value_change)
        cv2.createTrackbar('trans', win_name, 100, 200, self.value_change)
        self.trans = 0
        self.win_name = win_name

        self.render()
Ejemplo n.º 7
0
def main():
    model = utils.rnn_model.Model(32, 40, 40, 0.005, 0.0001)
    utils.tensorflow.model_persistor(
        model, checkpoint_dir='./notebooks/.checkpoints/')
    model = model.copy_in_stateful_model()

    Renderer.init_window(1000, 500)

    while True:
        angle = np.random.uniform(0, 2 * np.pi)
        direction = [np.sin(angle), np.cos(angle)]

        simulation = utils.pong.PONGSimulation(W=40, H=40, direction=angle)
        model.init(direction)

        while True:
            key = get_pressed_key()

            movement_left = 1 if key == 'w' else 0
            movement_left = -1 if key == 's' else movement_left

            movement_right = 1 if key == 'up' else 0
            movement_right = -1 if key == 'down' else movement_right

            controls = [movement_left, movement_right]

            frame, _ = simulation.tick(controls)
            pred_frame = model.tick(controls)

            rgb_frame = cmap(frame)
            rgb_pred_frame = cmap(pred_frame)

            print('LEFT: [%s]  --  RIGHT: [%s]' %
                  tuple(' UP ' if i > 0 else 'DOWN' if i < 0 else ' ** '
                        for i in controls))

            split_screen = np.concatenate((rgb_frame, rgb_pred_frame), axis=1)
            Renderer.show_frame(split_screen)

            if key == 'q': return
            if key == 'r': break
Ejemplo n.º 8
0
    def init_fn(self):
        # create training dataset
        self.train_ds = create_dataset(self.options.dataset, self.options)

        # create Mesh object
        self.mesh = Mesh()
        self.faces = self.mesh.faces.to(self.device)

        # create GraphCNN
        self.graph_cnn = GraphCNN(self.mesh.adjmat,
                                  self.mesh.ref_vertices.t(),
                                  num_channels=self.options.num_channels,
                                  num_layers=self.options.num_layers).to(
                                      self.device)

        # SMPL Parameter regressor
        self.smpl_param_regressor = SMPLParamRegressor().to(self.device)

        # Setup a joint optimizer for the 2 models
        self.optimizer = torch.optim.Adam(
            params=list(self.graph_cnn.parameters()) +
            list(self.smpl_param_regressor.parameters()),
            lr=self.options.lr,
            betas=(self.options.adam_beta1, 0.999),
            weight_decay=self.options.wd)

        # SMPL model
        self.smpl = SMPL().to(self.device)

        # Create loss functions
        self.criterion_shape = nn.L1Loss().to(self.device)
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        self.criterion_regr = nn.MSELoss().to(self.device)

        # Pack models and optimizers in a dict - necessary for checkpointing
        self.models_dict = {
            'graph_cnn': self.graph_cnn,
            'smpl_param_regressor': self.smpl_param_regressor
        }
        self.optimizers_dict = {'optimizer': self.optimizer}

        # Renderer for visualization
        self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy())

        # LSP indices from full list of keypoints
        self.to_lsp = list(range(14))

        # Optionally start training from a pretrained checkpoint
        # Note that this is different from resuming training
        # For the latter use --resume
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)
Ejemplo n.º 9
0
def bbox_from_json(bbox_file):
    """Get center and scale of bounding box from bounding box annotations.
    The expected format is [top_left(x), top_left(y), width, height].
    """
    with open(bbox_file, 'r') as f:
        bbox = np.array(json.load(f)['bbox']).astype(np.float32)
    ul_corner = bbox[:2]
    center = ul_corner + 0.5 * bbox[2:]
    # Load pretrained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)

    # Load SMPL model
    smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1,
                create_transl=False).to(device)
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH,
                        img_res=constants.IMG_RES,
                        faces=smpl.faces)

    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img,
                                  args.bbox,
                                  args.openpose,
                                  input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        pred_output = smpl(betas=pred_betas,
                           body_pose=pred_rotmat[:, 1:],
                           global_orient=pred_rotmat[:, 0].unsqueeze(1),
                           pose2rot=False)
        pred_vertices = pred_output.vertices

    # Calculate camera parameters for rendering
    camera_translation = torch.stack([
        pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH /
        (constants.IMG_RES * pred_camera[:, 0] + 1e-9)
    ],
                                     dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    img = img.permute(1, 2, 0).cpu().numpy()

    width = max(bbox[2], bbox[3])
    scale = width / 200.0
    # make sure the bounding box is rectangular
    return center, scale
Ejemplo n.º 10
0
    def init_fn(self):
        self.options.img_res = cfg.DANET.INIMG_SIZE
        self.options.heatmap_size = cfg.DANET.HEATMAP_SIZE
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)

        self.model = DaNet(options=self.options,
                           smpl_mean_params=path_config.SMPL_MEAN_PARAMS).to(
                               self.device)
        self.smpl = self.model.iuv2smpl.smpl

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=cfg.SOLVER.BASE_LR,
                                          weight_decay=0)

        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        # Load dictionary of fits of SPIN
        self.fits_dict = FitsDict(self.options, self.train_ds)

        # Create renderer
        try:
            self.renderer = Renderer(focal_length=self.focal_length,
                                     img_res=self.options.img_res,
                                     faces=self.smpl.faces)
        except:
            Warning('No renderer for visualization.')
            self.renderer = None

        self.decay_steps_ind = 1
Ejemplo n.º 11
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)

        self.model = hmr(config.SMPL_MEAN_PARAMS,
                         pretrained=True).to(self.device)

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.options.lr)
        self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
            self.optimizer, gamma=0.95)

        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=self.options.batch_size,
                         create_transl=False).to(self.device)

        # consistency loss
        self.criterion_consistency_contrastive = NTXent(
            tau=self.options.tau, kernel=self.options.kernel).to(self.device)
        self.criterion_consistency_mse = nn.MSELoss().to(self.device)
        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        # Create renderer
        self.renderer = Renderer(focal_length=self.focal_length,
                                 img_res=self.options.img_res,
                                 faces=self.smpl.faces)

        # Create input image flag
        self.input_img = self.options.input_img

        # initialize queue
        self.feat_queue = FeatQueue(max_queue_size=self.options.max_queue_size)
class render_utils(object):
    def __init__(self, config, device='cuda'):
        self.batch_size = config.batch_size
        self.image_size = config.image_size
        self.config = config
        self.device = device
        #
        self.flame = FLAME(self.config).to(self.device)
        self.flametex = FLAMETex(self.config).to(self.device)

        self._setup_renderer()
        self.flame_faces = self.render.faces

    def _setup_renderer(self):
        mesh_file = './data/head_template_mesh.obj'
        self.render = Renderer(self.image_size,
                               obj_filename=mesh_file).to(self.device)

    def render_tex_and_normal(self, shapecode, expcode, posecode, texcode,
                              lightcode, cam):
        verts, _, _ = self.flame(shape_params=shapecode,
                                 expression_params=expcode,
                                 pose_params=posecode)
        trans_verts = util.batch_orth_proj(verts, cam)
        trans_verts[:, :, 1:] = -trans_verts[:, :, 1:]

        albedos = self.flametex(texcode)
        rendering_results = self.render(verts,
                                        trans_verts,
                                        albedos,
                                        lights=lightcode)
        textured_images, normals = rendering_results[
            'images'], rendering_results['normals']
        normal_images = self.render.render_normal(trans_verts, normals)
        return textured_images, normal_images

    def get_flame_faces(self):
        return self.flame_faces
Ejemplo n.º 13
0
    # Load model
    mesh = Mesh(device=device)
    # Our pretrained networks have 5 residual blocks with 256 channels.
    # You might want to change this if you use a different architecture.
    model = CMR(mesh,
                5,
                256,
                pretrained_checkpoint=args.checkpoint,
                device=device)

    model.to(device)
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer()

    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img,
                                  args.bbox,
                                  args.openpose,
                                  input_res=cfg.INPUT_RES)
    norm_img = norm_img.to(device)
    with torch.no_grad():
        pred_vertices, pred_vertices_smpl, pred_camera, _, _ = model(norm_img)

    # Calculate camera parameters for rendering
    camera_translation = torch.stack([
        pred_camera[:, 1], pred_camera[:, 2], 2 * cfg.FOCAL_LENGTH /
        (cfg.INPUT_RES * pred_camera[:, 0] + 1e-9)
    ],
 def _setup_renderer(self):
     mesh_file = './data/head_template_mesh.obj'
     self.render = Renderer(self.image_size,
                            obj_filename=mesh_file).to(self.device)
Ejemplo n.º 15
0
# import matplotlib
# matplotlib.use('MACOSX')
import matplotlib.pyplot as plt
from smplx import SMPL

from utils.renderer import Renderer
from utils.cam_utils import perspective_project_torch
from data.ssp3d_dataset import SSP3DDataset
import config

# SMPL models in torch
smpl_male = SMPL(config.SMPL_MODEL_DIR, batch_size=1, gender='male')
smpl_female = SMPL(config.SMPL_MODEL_DIR, batch_size=1, gender='female')

# Pyrender renderer
renderer = Renderer(faces=smpl_male.faces, img_res=512)

# SSP-3D datset class
ssp3d_dataset = SSP3DDataset(config.SSP_3D_PATH)

indices_to_plot = [11, 60, 199]  # Visualising 3 examples from SSP-3D

for i in indices_to_plot:
    data = ssp3d_dataset.__getitem__(i)

    fname = data['fname']
    image = data['image']
    cropped_image = data['cropped_image']
    silhouette = data['silhouette']
    joints2D = data['joints2D']
Ejemplo n.º 16
0
 def _setup_renderer(self):
     self.render = Renderer(cfg.image_size, obj_filename=cfg.mesh_file).to(self.device)
Ejemplo n.º 17
0
class PhotometricFitting(object):
    def __init__(self, device='cuda'):
        # self.batch_size = cfg.batch_size
        # self.image_size = cfg.image_size
        # self.cropped_size = cfg.cropped_size
        self.config = cfg
        self.device = device
        self.flame = FLAME(self.config).to(self.device)
        self.flametex = FLAMETex(self.config).to(self.device)

        self._setup_renderer()

    def _setup_renderer(self):
        self.render = Renderer(cfg.image_size, obj_filename=cfg.mesh_file).to(self.device)

    def optimize(self, images, landmarks, image_masks, video_writer):
        bz = images.shape[0]
        shape = nn.Parameter(torch.zeros(bz, cfg.shape_params).float().to(self.device))
        tex = nn.Parameter(torch.zeros(bz, cfg.tex_params).float().to(self.device))
        exp = nn.Parameter(torch.zeros(bz, cfg.expression_params).float().to(self.device))
        pose = nn.Parameter(torch.zeros(bz, cfg.pose_params).float().to(self.device))
        cam = torch.zeros(bz, cfg.camera_params)
        cam[:, 0] = 5.
        cam = nn.Parameter(cam.float().to(self.device))
        lights = nn.Parameter(torch.zeros(bz, 9, 3).float().to(self.device))
        e_opt = torch.optim.Adam(
            [shape, exp, pose, cam, tex, lights],
            lr=cfg.e_lr,
            weight_decay=cfg.e_wd
        )

        gt_landmark = landmarks

        # non-rigid fitting of all the parameters with 68 face landmarks, photometric loss and regularization terms.
        all_train_iter = 0
        all_train_iters = []
        photometric_loss = []
        for k in range(cfg.max_iter):
            losses = {}
            vertices, landmarks2d, landmarks3d = self.flame(shape_params=shape, expression_params=exp, pose_params=pose)
            trans_vertices = util.batch_orth_proj(vertices, cam)
            trans_vertices[..., 1:] = - trans_vertices[..., 1:]
            landmarks2d = util.batch_orth_proj(landmarks2d, cam)
            landmarks2d[..., 1:] = - landmarks2d[..., 1:]
            landmarks3d = util.batch_orth_proj(landmarks3d, cam)
            landmarks3d[..., 1:] = - landmarks3d[..., 1:]
            losses['landmark'] = util.l2_distance(landmarks2d[:, :, :2], gt_landmark[:, :, :2])

            # render
            albedos = self.flametex(tex) / 255.
            ops = self.render(vertices, trans_vertices, albedos, lights)
            predicted_images = ops['images']
            # losses['photometric_texture'] = (image_masks * (ops['images'] - images).abs()).mean() * config.w_pho
            losses['photometric_texture'] = F.smooth_l1_loss(image_masks * ops['images'],
                                                             image_masks * images) * cfg.w_pho

            all_loss = 0.
            for key in losses.keys():
                all_loss = all_loss + losses[key]
            losses['all_loss'] = all_loss
            e_opt.zero_grad()
            all_loss.backward()
            e_opt.step()

            loss_info = '----iter: {}, time: {}\n'.format(k, datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))
            for key in losses.keys():
                loss_info = loss_info + '{}: {}, '.format(key, float(losses[key]))

            if k % 10 == 0:
                all_train_iter += 10
                all_train_iters.append(all_train_iter)
                photometric_loss.append(losses['photometric_texture'])
                print(loss_info)

                grids = {}
                visind = range(bz)  # [0]
                grids['images'] = torchvision.utils.make_grid(images[visind]).detach().cpu()
                grids['landmarks_gt'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind], landmarks[visind]))
                grids['landmarks2d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind], landmarks2d[visind]))
                grids['landmarks3d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind], landmarks3d[visind]))
                grids['albedoimage'] = torchvision.utils.make_grid(
                    (ops['albedo_images'])[visind].detach().cpu())
                grids['render'] = torchvision.utils.make_grid(predicted_images[visind].detach().float().cpu())
                shape_images = self.render.render_shape(vertices, trans_vertices, images)
                grids['shape'] = torchvision.utils.make_grid(
                    F.interpolate(shape_images[visind], [224, 224])).detach().float().cpu()

                # grids['tex'] = torchvision.utils.make_grid(F.interpolate(albedos[visind], [224, 224])).detach().cpu()
                grid = torch.cat(list(grids.values()), 1)
                grid_image = (grid.numpy().transpose(1, 2, 0).copy() * 255)[:, :, [2, 1, 0]]
                grid_image = np.minimum(np.maximum(grid_image, 0), 255).astype(np.uint8)
                video_writer.write(grid_image)

        single_params = {
            'shape': shape.detach().cpu().numpy(),
            'exp': exp.detach().cpu().numpy(),
            'pose': pose.detach().cpu().numpy(),
            'cam': cam.detach().cpu().numpy(),
            'verts': trans_vertices.detach().cpu().numpy(),
            'albedos': albedos.detach().cpu().numpy(),
            'tex': tex.detach().cpu().numpy(),
            'lit': lights.detach().cpu().numpy()
        }
        util.draw_train_process("training", all_train_iters, photometric_loss, 'photometric loss')
        # np.save("./test_results/model.npy", single_params)
        return single_params

    def run(self, img, net, rect_detect, landmark_detect, rect_thresh, save_name, video_writer, savefolder):
        # The implementation is potentially able to optimize with images(batch_size>1),
        # here we show the example with a single image fitting
        images = []
        landmarks = []
        image_masks = []
        bbox = rect_detect.extract(img, rect_thresh)
        if len(bbox) > 0:
            crop_image, new_bbox = util.crop_img(img, bbox[0], cfg.cropped_size)

            # input landmark
            resize_img, landmark = landmark_detect.extract([crop_image, [new_bbox]])
            landmark = landmark[0]
            landmark[:, 0] = landmark[:, 0] / float(resize_img.shape[1]) * 2 - 1
            landmark[:, 1] = landmark[:, 1] / float(resize_img.shape[0]) * 2 - 1
            landmarks.append(torch.from_numpy(landmark)[None, :, :].double().to(self.device))
            landmarks = torch.cat(landmarks, dim=0)

            # input image
            image = cv2.resize(crop_image, (cfg.cropped_size, cfg.cropped_size)).astype(np.float32) / 255.
            image = image[:, :, [2, 1, 0]].transpose(2, 0, 1)
            images.append(torch.from_numpy(image[None, :, :, :]).double().to(self.device))
            images = torch.cat(images, dim=0)
            images = F.interpolate(images, [cfg.image_size, cfg.image_size])

            # face segment mask
            image_mask = util.face_seg(crop_image, net, cfg.cropped_size)
            image_masks.append(torch.from_numpy(image_mask).double().to(cfg.device))
            image_masks = torch.cat(image_masks, dim=0)
            image_masks = F.interpolate(image_masks, [cfg.image_size, cfg.image_size])

            # check folder exist or not
            util.check_mkdir(savefolder)
            save_file = os.path.join(savefolder, save_name)

            # optimize
            single_params = self.optimize(images, landmarks, image_masks, video_writer)
            self.render.save_obj(filename=save_file,
                                 vertices=torch.from_numpy(single_params['verts'][0]).to(self.device),
                                 textures=torch.from_numpy(single_params['albedos'][0]).to(self.device)
                                 )
            np.save(save_file, single_params)
Ejemplo n.º 18
0
class UrbanEnv(CarlaGym):
    """docstring for ClassName"""
    def __init__(self):
        super(UrbanEnv, self).__init__()

        # Initialize environment parameters
        self.town = carla_config.town
        self.state_y = carla_config.grid_height
        self.state_x = carla_config.grid_width
        self.channel = carla_config.features

        # Sensors
        self.rgb_sensor = None
        self.semantic_sensor = None

        # Planners
        self.planner = None

        # States and actions
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(carla_config.grid_height,
                                                   carla_config.grid_width,
                                                   carla_config.features),
                                            dtype=np.uint8)
        self.action_space = spaces.Discrete(carla_config.N_DISCRETE_ACTIONS)

        self.ego_vehicle = None
        self.current_speed = 0.0

        # Rendering related
        self.renderer = None
        self.is_render_enabled = carla_config.render

        if self.is_render_enabled:
            self.renderer = Renderer()
            self.init_renderer()

    def step(self, action=None, sp=25):

        self._take_action(action, sp)

        self.tick()

        state = self._get_observation()

        reward, done = self._get_reward()

        if self.render and self.is_render_enabled:
            if self.rgb_image is not None:
                img = self.get_rendered_image()
                self.renderer.render_image(img)

        return state, reward, done

    def _get_observation(self):
        tensor = np.zeros([self.state_y, self.state_x, self.channel])

        # Fill ego vehicle information
        ev_trans = self.ego_vehicle.get_transform()

        for i in range(-1, 2):
            for j in range(0, 2):
                x_discrete, status = self.get_index(i, carla_config.x_min,
                                                    carla_config.x_max,
                                                    carla_config.x_size)
                y_discrete, status = self.get_index(j, carla_config.y_min,
                                                    carla_config.y_max,
                                                    carla_config.y_size)

                x_discrete = np.argmax(x_discrete)
                y_discrete = np.argmax(y_discrete)

                tensor[x_discrete, y_discrete, :] = [0.01]

        # Fill pedestrian information
        peds = self.world.get_actors().filter('walker.pedestrian.*')
        for p in peds:
            p_trans = p.get_transform()
            #print(p_trans.rotation.yaw - ev_trans.rotation.yaw)

            p_xyz = np.array(
                [p_trans.location.x, p_trans.location.y, p_trans.location.z])
            ev_xyz = np.array([
                ev_trans.location.x, ev_trans.location.y, ev_trans.location.z
            ])
            ped_loc = p_xyz - ev_xyz

            pitch = math.radians(ev_trans.rotation.pitch)
            roll = math.radians(ev_trans.rotation.roll)
            yaw = math.radians(ev_trans.rotation.yaw)
            R = transforms3d.euler.euler2mat(roll, pitch, yaw).T
            ped_loc_relative = np.dot(R, ped_loc)

            x_discrete, status_x = self.get_index(ped_loc_relative[0],
                                                  carla_config.x_min,
                                                  carla_config.x_max,
                                                  carla_config.x_size)
            y_discrete, status_y = self.get_index(ped_loc_relative[1],
                                                  carla_config.y_min,
                                                  carla_config.y_max,
                                                  carla_config.y_size)

            if status_x and status_y:
                x_discrete = np.argmax(x_discrete)
                y_discrete = np.argmax(y_discrete)

                # Get pdestrian id
                p_id = int(p.attributes['role_name'])

                # Get pedestrian relative orientation
                p_heading = p_trans.rotation.yaw - ev_trans.rotation.yaw

                # Get pedestrian lane type
                ped_lane = None
                waypoint = self.world.get_map().get_waypoint(
                    p_trans.location,
                    project_to_road=True,
                    lane_type=(carla.LaneType.Driving
                               | carla.LaneType.Sidewalk))
                if waypoint.lane_type == carla.LaneType.Driving:
                    ped_lane = 1
                elif waypoint.lane_type == carla.LaneType.Sidewalk or waypoint.lane_type == carla.LaneType.Shoulder:
                    ped_lane = 2

                if within_crosswalk(p_trans.location.x, p_trans.location.y):
                    ped_lane = 3

                tensor[x_discrete, y_discrete, :] = [
                    self.normalize_data(p_id, 0.0, carla_config.num_of_ped),
                    self.normalize_data(p_heading, 0.0, 360.0),
                    self.normalize_data(ped_lane, 0.0, 3)
                ]
                # ped id, ped relative orientation and region occupied.

                #print(tensor[x_discrete, y_discrete, :])

        return tensor

    def _get_reward(self):

        done = False

        total_reward = d_reward = nc_reward = c_reward = 0

        # Reward for speed
        # ev_speed = get_speed(self.ego_vehicle)

        # if ev_speed > 0.0 and ev_speed <=50:
        #     d_reward = (10.0 - abs(10.0 - ev_speed))/10.0

        # elif ev_speed > 50:
        #     d_reward = -5

        # elif ev_speed <= 0.0:
        #     d_reward = -2

        ## Reward(penalty) for collision
        pedestrian_list = self.world.get_actors().filter('walker.pedestrian.*')
        collision, near_collision, pedestrian = self.is_collision(
            pedestrian_list)

        if collision is True:
            #print('collision')
            c_reward = -10
            done = True

        elif near_collision is True:
            #print('near collision')
            nc_reward = -5

        # Check if goal reached
        if self.planner.done():
            done = True

        total_reward = d_reward + nc_reward + c_reward

        return total_reward, done

    def _take_action(self, action, sp):

        mps_kmph_conversion = 3.6

        target_speed = 0.0

        # accelerate
        if action == 0:
            current_speed = get_speed(self.ego_vehicle) / mps_kmph_conversion
            desired_speed = current_speed + 0.2
            desired_speed *= 3.6
            self.current_speed = desired_speed
            self.planner.local_planner.set_speed(desired_speed)
            control = self.planner.run_step()
            control.brake = 0.0
            self.ego_vehicle.apply_control(control)

        # decelerate
        elif action == 1:
            current_speed = get_speed(self.ego_vehicle) / mps_kmph_conversion
            desired_speed = current_speed - 0.2
            desired_speed *= 3.6
            self.current_speed = desired_speed
            self.planner.local_planner.set_speed(desired_speed)
            control = self.planner.run_step()
            control.brake = 0.0
            self.ego_vehicle.apply_control(control)

        elif action == 2:  # emergency stop
            self.emergency_stop()

        # speed tracking
        elif action == 3:
            self.planner.local_planner.set_speed(self.current_speed)
            control = self.planner.run_step()
            control.brake = 0.0
            self.ego_vehicle.apply_control(control)

    def emergency_stop(self):
        control = carla.VehicleControl()
        control.steer = 0.0
        control.throttle = 0.0
        control.brake = 1.0
        control.hand_brake = False
        self.ego_vehicle.apply_control(control)

    def reset(self, client_only=False):

        if self.server:
            self.close()

        self.setup_client_and_server(display=carla_config.display,
                                     rendering=carla_config.render)

        self.initialize_ego_vehicle()

        self.apply_settings(fps=1.0, no_rendering=not carla_config.render)

        self.world.get_map().generate_waypoints(1.0)

        # Run some initial steps
        for i in range(100):
            self.step('2')

        state = self._get_observation()

        return state

    def initialize_ego_vehicle(self):
        # Spawn ego vehicle
        sp = carla.Transform(
            carla.Location(x=carla_config.sp_x,
                           y=carla_config.sp_y,
                           z=carla_config.sp_z),
            carla.Rotation(yaw=carla_config.sp_yaw))
        bp = random.choice(self.world.get_blueprint_library().filter(
            carla_config.ev_bp))
        bp.set_attribute('role_name', carla_config.ev_name)
        self.ego_vehicle = self.spawn_ego_vehicle(bp, sp)

        # Add sensors to ego vehicle
        # RGB sensor
        if carla_config.rgb_sensor:
            rgb_bp = rgb_bp = self.world.get_blueprint_library().find(
                'sensor.camera.rgb')
            rgb_bp.set_attribute('image_size_x', carla_config.rgb_size_x)
            rgb_bp.set_attribute('image_size_y', carla_config.rgb_size_y)
            rgb_bp.set_attribute('fov', carla_config.rgb_fov)
            transform = carla.Transform(
                carla.Location(x=carla_config.rgb_loc_x,
                               z=carla_config.rgb_loc_z))
            self.rgb_sensor = self.world.spawn_actor(
                rgb_bp, transform, attach_to=self.ego_vehicle)
            self.rgb_sensor.listen(self.rgb_sensor_callback)
            self.rgb_image = None

        # Semantic sensor
        if carla_config.sem_sensor:
            sem_bp = self.world.get_blueprint_library().find(
                'sensor.camera.semantic_segmentation')
            sem_bp.set_attribute('image_size_x', carla_config.rgb_size_x)
            sem_bp.set_attribute('image_size_y', carla_config.rgb_size_y)
            sem_bp.set_attribute('fov', carla_config.rgb_fov)
            trnasform = carla.Transform(
                carla.Location(x=carla_config.rgb_loc_x,
                               z=carla_config.rgb_loc_z))
            self.semantic_sensor = self.world.spawn_actor(
                sem_bp, trnasform, attach_to=self.ego_vehicle)
            self.semantic_sensor.listen(self.semantic_sensor_callback)
            self.semantic_image = None

        # Initialize the planner
        self.planner = Planner()
        self.planner.initialize(self.ego_vehicle)
        self.planner.set_destination(
            (carla_config.ev_goal_x, carla_config.ev_goal_y,
             carla_config.ev_goal_z))

    def spawn_ego_vehicle(self, bp=None, sp=None):

        if not bp:
            bp = random.choice(
                self.world.get_blueprint_library().filter('vehicle.*'))

        if not sp:
            sp = random.choice(self.world.get_map().get_spawn_points())

        return self.world.spawn_actor(bp, sp)

    def rgb_sensor_callback(self, image):
        #image.convert(cc.Raw)
        array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8"))
        array = np.reshape(array, (image.height, image.width, 4))
        array = array[:, :, :3]
        self.rgb_image = array

    def semantic_sensor_callback(self, image):
        image.convert(cc.CityScapesPalette)
        array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8"))
        array = np.reshape(array, (image.height, image.width, 4))
        array = array[:, :, :3]
        array = array[:, :, ::-1]
        self.semantic_image = array

    def close(self):
        #if carla_config.rgb_sensor:
        if self.rgb_sensor is not None:
            self.rgb_sensor.destroy()
        #if carla_config.sem_sensor:
        if self.semantic_sensor is not None:
            self.semantic_sensor.destroy()
        # if self.ego_vehicle is not None:
        #     self.ego_vehicle.destroy()

        if self.renderer is not None:
            self.renderer.close()
        self.kill_processes()

    def init_renderer(self):
        self.no_of_cam = 0
        if carla_config.rgb_sensor: self.no_of_cam += 1
        if carla_config.sem_sensor: self.no_of_cam += 1

        self.renderer.create_screen(carla_config.screen_x,
                                    carla_config.screen_y * self.no_of_cam)

    def render(self):
        if self.renderer is None or not (self.is_render_enabled):
            return

        img = self.get_rendered_image()
        self.renderer.render_image(img)

    def get_rendered_image(self):
        temp = []
        if carla_config.rgb_sensor: temp.append(self.rgb_image)
        if carla_config.sem_sensor: temp.append(self.semantic_image)

        return np.vstack(img for img in temp)

    def get_index(self, val, start, stop, num):

        grids = np.linspace(start, stop, num)
        features = np.zeros(num)

        #Check extremes
        if val <= grids[0] or val > grids[-1]:
            return features, False

        for i in range(len(grids) - 1):
            if val >= grids[i] and val < grids[i + 1]:
                features[i] = 1

        return features, True

    def normalize_data(self, data, min_val, max_val):
        return (data - min_val) / (max_val - min_val)

    def is_collision(self, entity_list):

        ego_vehicle_location = self.ego_vehicle.get_transform().location
        ego_vehicle_waypoint = self.world.get_map().get_waypoint(
            ego_vehicle_location)

        for target in entity_list:

            # if the object is not in our lane it's not an obstacle
            target_waypoint = self.world.get_map().get_waypoint(
                target.get_location(),
                project_to_road=True,
                lane_type=(carla.LaneType.Driving | carla.LaneType.Sidewalk))

            # if target_waypoint.road_id == ego_vehicle_waypoint.road_id and \
            #         target_waypoint.lane_id == ego_vehicle_waypoint.lane_id:
            #target_waypoint.lane_type == ego_vehicle_waypoint.lane_type:
            if target_waypoint.lane_type == carla.LaneType.Driving and target_waypoint.lane_id == ego_vehicle_waypoint.lane_id:
                if is_within_distance_ahead(target.get_transform(),
                                            self.ego_vehicle.get_transform(),
                                            4.0):
                    #if self.distance(self.ego_vehicle.get_transform(), target.get_transform()) < 10.0:
                    return (True, True, target)

            # if target_waypoint.road_id == ego_vehicle_waypoint.road_id and \
            #         target_waypoint.lane_type == ego_vehicle_waypoint.lane_type:

                elif is_within_distance_ahead(target.get_transform(),
                                              self.ego_vehicle.get_transform(),
                                              8.0):
                    return (False, True, target)

        return (False, False, None)

    def distance(self, source_transform, destination_transform):
        dx = source_transform.location.x - destination_transform.location.x
        dy = source_transform.location.y - destination_transform.location.y

        return math.sqrt(dx * dx + dy * dy)

    def get_ego_speed(self):
        return get_speed(self.ego_vehicle)
Ejemplo n.º 19
0
if __name__ == '__main__':
    args = parser.parse_args()
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Load trained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.trained_model)
    model.load_state_dict(checkpoint['model'], strict=False)
    smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1,
                create_transl=False).to(device)
    model.eval()
    # Generate rendered image
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH,
                        img_res=constants.IMG_RES,
                        faces=smpl.faces)
    # Processs the image and predict the parameters
    img, norm_img = process_image(args.test_image,
                                  args.bbox,
                                  input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        pred_output = smpl(betas=pred_betas,
                           body_pose=pred_rotmat[:, 1:],
                           global_orient=pred_rotmat[:, 0].unsqueeze(1),
                           pose2rot=False)
        pred_vertices = pred_output.vertices
    camera_translation = torch.stack([
        pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH /
        (constants.IMG_RES * pred_camera[:, 0] + 1e-9)
Ejemplo n.º 20
0
class Trainer(BaseTrainer):
    def init_fn(self):
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)

        self.model = hmr(config.SMPL_MEAN_PARAMS,
                         pretrained=True).to(self.device)

        # Switch the optimizer
        if self.options.optimizer == 'adam':
            print('Using adam')
            self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                              lr=self.options.lr,
                                              weight_decay=0)
        elif self.options.optimizer == 'sgd':
            print('Using sgd')
            self.optimizer = torch.optim.SGD(params=self.model.parameters(),
                                             lr=self.options.lr,
                                             weight_decay=0)
        elif self.options.optimizer == 'momentum':
            print('Using momentum')
            self.optimizer = torch.optim.SGD(params=self.model.parameters(),
                                             lr=self.options.lr,
                                             momentum=self.options.momentum,
                                             weight_decay=0)
        else:
            print(self.options.optimizer + 'Not found')
            raise Exception("Optimizer Wrong!")
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=self.options.batch_size,
                         create_transl=False).to(self.device)
        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH

        # Initialize SMPLify fitting module
        self.smplify = SMPLify(step_size=1e-2,
                               batch_size=self.options.batch_size,
                               num_iters=self.options.num_smplify_iters,
                               focal_length=self.focal_length)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        # Load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)

        # Create renderer
        self.renderer = Renderer(focal_length=self.focal_length,
                                 img_res=self.options.img_res,
                                 faces=self.smpl.faces)

    def finalize(self):
        self.fits_dict.save()

    def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d,
                      openpose_weight, gt_weight):
        """ Compute 2D reprojection loss on the keypoints.
        The loss is weighted by the confidence.
        The available keypoints are different for each dataset.
        """
        conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
        conf[:, :25] *= openpose_weight
        conf[:, 25:] *= gt_weight
        loss = (conf * self.criterion_keypoints(
            pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean()
        return loss

    def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d,
                         has_pose_3d):
        """Compute 3D keypoint loss for the examples that 3D keypoint annotations are available.
        The loss is weighted by the confidence.
        """
        pred_keypoints_3d = pred_keypoints_3d[:, 25:, :]
        conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
        gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
        gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1]
        conf = conf[has_pose_3d == 1]
        pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1]
        if len(gt_keypoints_3d) > 0:
            gt_pelvis = (gt_keypoints_3d[:, 2, :] +
                         gt_keypoints_3d[:, 3, :]) / 2
            gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
            pred_pelvis = (pred_keypoints_3d[:, 2, :] +
                           pred_keypoints_3d[:, 3, :]) / 2
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
            return (conf * self.criterion_keypoints(pred_keypoints_3d,
                                                    gt_keypoints_3d)).mean()
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def shape_loss(self, pred_vertices, gt_vertices, has_smpl):
        """Compute per-vertex loss on the shape for the examples that SMPL annotations are available."""
        pred_vertices_with_shape = pred_vertices[has_smpl == 1]
        gt_vertices_with_shape = gt_vertices[has_smpl == 1]
        if len(gt_vertices_with_shape) > 0:
            return self.criterion_shape(pred_vertices_with_shape,
                                        gt_vertices_with_shape)
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas,
                    has_smpl):
        pred_rotmat_valid = pred_rotmat[has_smpl == 1]
        gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view(
            -1, 24, 3, 3)[has_smpl == 1]
        pred_betas_valid = pred_betas[has_smpl == 1]
        gt_betas_valid = gt_betas[has_smpl == 1]
        if len(pred_rotmat_valid) > 0:
            loss_regr_pose = self.criterion_regr(pred_rotmat_valid,
                                                 gt_rotmat_valid)
            loss_regr_betas = self.criterion_regr(pred_betas_valid,
                                                  gt_betas_valid)
        else:
            loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device)
            loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device)
        return loss_regr_pose, loss_regr_betas

    def train_step(self, input_batch):
        self.model.train()

        # Get data from the batch
        images = input_batch['img']  # input image
        gt_keypoints_2d = input_batch['keypoints']  # 2D keypoints
        gt_pose = input_batch['pose']  # SMPL pose parameters
        gt_betas = input_batch['betas']  # SMPL beta parameters
        gt_joints = input_batch['pose_3d']  # 3D pose
        has_smpl = input_batch['has_smpl'].byte(
        )  # flag that indicates whether SMPL parameters are valid
        has_pose_3d = input_batch['has_pose_3d'].byte(
        )  # flag that indicates whether 3D pose is valid
        is_flipped = input_batch[
            'is_flipped']  # flag that indicates whether image was flipped during data augmentation
        rot_angle = input_batch[
            'rot_angle']  # rotation angle used for data augmentation
        dataset_name = input_batch[
            'dataset_name']  # name of the dataset the image comes from
        indices = input_batch[
            'sample_index']  # index of example inside its dataset
        batch_size = images.shape[0]

        # Get GT vertices and model joints
        # Note that gt_model_joints is different from gt_joints as it comes from SMPL
        gt_out = self.smpl(betas=gt_betas,
                           body_pose=gt_pose[:, 3:],
                           global_orient=gt_pose[:, :3])
        gt_model_joints = gt_out.joints
        gt_vertices = gt_out.vertices

        # Get current best fits from the dictionary
        opt_pose, opt_betas = self.fits_dict[(dataset_name, indices.cpu(),
                                              rot_angle.cpu(),
                                              is_flipped.cpu())]
        opt_pose = opt_pose.to(self.device)
        opt_betas = opt_betas.to(self.device)
        opt_output = self.smpl(betas=opt_betas,
                               body_pose=opt_pose[:, 3:],
                               global_orient=opt_pose[:, :3])
        opt_vertices = opt_output.vertices
        if opt_vertices.shape != (self.options.batch_size, 6890, 3):
            opt_vertices = torch.zeros_like(opt_vertices, device=self.device)
        opt_joints = opt_output.joints

        # De-normalize 2D keypoints from [-1,1] to pixel space
        gt_keypoints_2d_orig = gt_keypoints_2d.clone()
        gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * (
            gt_keypoints_2d_orig[:, :, :-1] + 1)

        # Estimate camera translation given the model joints and 2D keypoints
        # by minimizing a weighted least squares loss
        gt_cam_t = estimate_translation(gt_model_joints,
                                        gt_keypoints_2d_orig,
                                        focal_length=self.focal_length,
                                        img_size=self.options.img_res)

        opt_cam_t = estimate_translation(opt_joints,
                                         gt_keypoints_2d_orig,
                                         focal_length=self.focal_length,
                                         img_size=self.options.img_res)

        opt_joint_loss = self.smplify.get_fitting_loss(
            opt_pose, opt_betas, opt_cam_t, 0.5 * self.options.img_res *
            torch.ones(batch_size, 2, device=self.device),
            gt_keypoints_2d_orig).mean(dim=-1)

        # Feed images in the network to predict camera and SMPL parameters
        pred_rotmat, pred_betas, pred_camera = self.model(images)

        pred_output = self.smpl(betas=pred_betas,
                                body_pose=pred_rotmat[:, 1:],
                                global_orient=pred_rotmat[:, 0].unsqueeze(1),
                                pose2rot=False)
        pred_vertices = pred_output.vertices
        if pred_vertices.shape != (self.options.batch_size, 6890, 3):
            pred_vertices = torch.zeros_like(pred_vertices, device=self.device)

        pred_joints = pred_output.joints

        # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size
        # This camera translation can be used in a full perspective projection
        pred_cam_t = torch.stack([
            pred_camera[:, 1], pred_camera[:, 2], 2 * self.focal_length /
            (self.options.img_res * pred_camera[:, 0] + 1e-9)
        ],
                                 dim=-1)

        camera_center = torch.zeros(batch_size, 2, device=self.device)
        pred_keypoints_2d = perspective_projection(
            pred_joints,
            rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(
                batch_size, -1, -1),
            translation=pred_cam_t,
            focal_length=self.focal_length,
            camera_center=camera_center)
        # Normalize keypoints to [-1,1]
        pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.)

        if self.options.run_smplify:

            # Convert predicted rotation matrices to axis-angle
            pred_rotmat_hom = torch.cat([
                pred_rotmat.detach().view(-1, 3, 3).detach(),
                torch.tensor(
                    [0, 0, 1], dtype=torch.float32, device=self.device).view(
                        1, 3, 1).expand(batch_size * 24, -1, -1)
            ],
                                        dim=-1)
            pred_pose = rotation_matrix_to_angle_axis(
                pred_rotmat_hom).contiguous().view(batch_size, -1)
            # tgm.rotation_matrix_to_angle_axis returns NaN for 0 rotation, so manually hack it
            pred_pose[torch.isnan(pred_pose)] = 0.0

            # Run SMPLify optimization starting from the network prediction
            new_opt_vertices, new_opt_joints,\
            new_opt_pose, new_opt_betas,\
            new_opt_cam_t, new_opt_joint_loss = self.smplify(
                                        pred_pose.detach(), pred_betas.detach(),
                                        pred_cam_t.detach(),
                                        0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device),
                                        gt_keypoints_2d_orig)
            new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1)

            # Will update the dictionary for the examples where the new loss is less than the current one
            update = (new_opt_joint_loss < opt_joint_loss)

            opt_joint_loss[update] = new_opt_joint_loss[update]
            opt_vertices[update, :] = new_opt_vertices[update, :]
            opt_joints[update, :] = new_opt_joints[update, :]
            opt_pose[update, :] = new_opt_pose[update, :]
            opt_betas[update, :] = new_opt_betas[update, :]
            opt_cam_t[update, :] = new_opt_cam_t[update, :]

            self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(),
                            is_flipped.cpu(),
                            update.cpu())] = (opt_pose.cpu(), opt_betas.cpu())

        else:
            update = torch.zeros(batch_size, device=self.device).byte()

        # Replace extreme betas with zero betas
        opt_betas[(opt_betas.abs() > 3).any(dim=-1)] = 0.

        # Replace the optimized parameters with the ground truth parameters, if available
        opt_vertices[has_smpl, :, :] = gt_vertices[has_smpl, :, :]
        opt_cam_t[has_smpl, :] = gt_cam_t[has_smpl, :]
        opt_joints[has_smpl, :, :] = gt_model_joints[has_smpl, :, :]
        opt_pose[has_smpl, :] = gt_pose[has_smpl, :]
        opt_betas[has_smpl, :] = gt_betas[has_smpl, :]

        # Assert whether a fit is valid by comparing the joint loss with the threshold
        valid_fit = (opt_joint_loss < self.options.smplify_threshold).to(
            self.device)
        # Add the examples with GT parameters to the list of valid fits
        # print(valid_fit.dtype)
        valid_fit = valid_fit.to(torch.uint8)
        valid_fit = valid_fit | has_smpl

        opt_keypoints_2d = perspective_projection(
            opt_joints,
            rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(
                batch_size, -1, -1),
            translation=opt_cam_t,
            focal_length=self.focal_length,
            camera_center=camera_center)

        opt_keypoints_2d = opt_keypoints_2d / (self.options.img_res / 2.)

        # Compute loss on SMPL parameters
        loss_regr_pose, loss_regr_betas = self.smpl_losses(
            pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit)

        # Compute 2D reprojection loss for the keypoints
        loss_keypoints = self.keypoint_loss(pred_keypoints_2d, gt_keypoints_2d,
                                            self.options.openpose_train_weight,
                                            self.options.gt_train_weight)

        # Compute 3D keypoint loss
        loss_keypoints_3d = self.keypoint_3d_loss(pred_joints, gt_joints,
                                                  has_pose_3d)

        # Per-vertex loss for the shape
        loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit)

        # Compute total loss
        # The last component is a loss that forces the network to predict positive depth values
        loss = self.options.shape_loss_weight * loss_shape +\
               self.options.keypoint_loss_weight * loss_keypoints +\
               self.options.keypoint_loss_weight * loss_keypoints_3d +\
               loss_regr_pose + self.options.beta_loss_weight * loss_regr_betas +\
               ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean()
        loss *= 60

        # Do backprop
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Pack output arguments for tensorboard logging
        output = {
            'pred_vertices': pred_vertices.detach(),
            'opt_vertices': opt_vertices,
            'pred_cam_t': pred_cam_t.detach(),
            'opt_cam_t': opt_cam_t
        }
        losses = {
            'loss': loss.detach().item(),
            'loss_keypoints': loss_keypoints.detach().item(),
            'loss_keypoints_3d': loss_keypoints_3d.detach().item(),
            'loss_regr_pose': loss_regr_pose.detach().item(),
            'loss_regr_betas': loss_regr_betas.detach().item(),
            'loss_shape': loss_shape.detach().item()
        }

        return output, losses

    def train_summaries(self, input_batch, output, losses):
        # Update dictionary every time when summaries are provoked
        self.finalize()
        images = input_batch['img']
        images = images * torch.tensor(
            [0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1)
        images = images + torch.tensor(
            [0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1)

        pred_vertices = output['pred_vertices']
        assert pred_vertices.shape == (self.options.batch_size, 6890, 3)
        opt_vertices = output['opt_vertices']
        pred_cam_t = output['pred_cam_t']
        opt_cam_t = output['opt_cam_t']
        images_pred = self.renderer.visualize_tb(pred_vertices, pred_cam_t,
                                                 images)
        images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t,
                                                images)
        self.summary_writer.add_image('pred_shape', images_pred,
                                      self.step_count)
        self.summary_writer.add_image('opt_shape', images_opt, self.step_count)
        for loss_name, val in losses.items():
            self.summary_writer.add_scalar(loss_name, val, self.step_count)
Ejemplo n.º 21
0
    def init_fn(self):
        # create training dataset
        self.train_ds = create_dataset(self.options.dataset,
                                       self.options,
                                       use_IUV=True)
        self.dp_res = int(self.options.img_res // (2**self.options.warp_level))

        self.CNet = DPNet(warp_lv=self.options.warp_level,
                          norm_type=self.options.norm_type).to(self.device)

        self.LNet = get_LNet(self.options).to(self.device)
        self.smpl = SMPL().to(self.device)
        self.female_smpl = SMPL(cfg.FEMALE_SMPL_FILE).to(self.device)
        self.male_smpl = SMPL(cfg.MALE_SMPL_FILE).to(self.device)

        uv_res = self.options.uv_res
        self.uv_type = self.options.uv_type
        self.sampler = Index_UV_Generator(UV_height=uv_res,
                                          UV_width=-1,
                                          uv_type=self.uv_type).to(self.device)

        weight_file = 'data/weight_p24_h{:04d}_w{:04d}_{}.npy'.format(
            uv_res, uv_res, self.uv_type)
        if not os.path.exists(weight_file):
            cal_uv_weight(self.sampler, weight_file)

        uv_weight = torch.from_numpy(np.load(weight_file)).to(
            self.device).float()
        uv_weight = uv_weight * self.sampler.mask.to(uv_weight.device).float()
        uv_weight = uv_weight / uv_weight.mean()
        self.uv_weight = uv_weight[None, :, :, None]
        self.tv_factor = (uv_res - 1) * (uv_res - 1)

        # Setup an optimizer
        if self.options.stage == 'dp':
            self.optimizer = torch.optim.Adam(
                params=list(self.CNet.parameters()),
                lr=self.options.lr,
                betas=(self.options.adam_beta1, 0.999),
                weight_decay=self.options.wd)
            self.models_dict = {'CNet': self.CNet}
            self.optimizers_dict = {'optimizer': self.optimizer}

        else:
            self.optimizer = torch.optim.Adam(
                params=list(self.LNet.parameters()) +
                list(self.CNet.parameters()),
                lr=self.options.lr,
                betas=(self.options.adam_beta1, 0.999),
                weight_decay=self.options.wd)
            self.models_dict = {'CNet': self.CNet, 'LNet': self.LNet}
            self.optimizers_dict = {'optimizer': self.optimizer}

        # Create loss functions
        self.criterion_shape = nn.L1Loss().to(self.device)
        self.criterion_uv = nn.L1Loss().to(self.device)
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        self.criterion_keypoints_3d = nn.L1Loss(reduction='none').to(
            self.device)
        self.criterion_regr = nn.MSELoss().to(self.device)

        # LSP indices from full list of keypoints
        self.to_lsp = list(range(14))
        self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy())

        # Optionally start training from a pretrained checkpoint
        # Note that this is different from resuming training
        # For the latter use --resume
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)
class PhotometricFitting(object):
    def __init__(self, device='cuda'):
        self.batch_size = cfg.batch_size
        self.image_size = cfg.image_size
        self.cropped_size = cfg.cropped_size
        self.config = cfg
        self.device = device
        self.flame = FLAME(self.config).to(self.device)
        self.flametex = FLAMETex(self.config).to(self.device)

        self._setup_renderer()

    def _setup_renderer(self):
        self.render = Renderer(self.image_size,
                               obj_filename=cfg.mesh_file).to(self.device)

    def optimize(self, images, landmarks, image_masks, savefolder=None):
        bz = images.shape[0]
        shape = nn.Parameter(
            torch.zeros(bz, cfg.shape_params).float().to(self.device))
        tex = nn.Parameter(
            torch.zeros(bz, cfg.tex_params).float().to(self.device))
        exp = nn.Parameter(
            torch.zeros(bz, cfg.expression_params).float().to(self.device))
        pose = nn.Parameter(
            torch.zeros(bz, cfg.pose_params).float().to(self.device))
        cam = torch.zeros(bz, cfg.camera_params)
        cam[:, 0] = 5.
        cam = nn.Parameter(cam.float().to(self.device))
        lights = nn.Parameter(torch.zeros(bz, 9, 3).float().to(self.device))
        e_opt = torch.optim.Adam([shape, exp, pose, cam, tex, lights],
                                 lr=cfg.e_lr,
                                 weight_decay=cfg.e_wd)
        e_opt_rigid = torch.optim.Adam([pose, cam],
                                       lr=cfg.e_lr,
                                       weight_decay=cfg.e_wd)

        gt_landmark = landmarks

        # rigid fitting of pose and camera with 51 static face landmarks,
        # this is due to the non-differentiable attribute of contour landmarks trajectory
        for k in range(200):
            losses = {}
            vertices, landmarks2d, landmarks3d = self.flame(
                shape_params=shape, expression_params=exp, pose_params=pose)
            trans_vertices = util.batch_orth_proj(vertices, cam)
            trans_vertices[..., 1:] = -trans_vertices[..., 1:]
            landmarks2d = util.batch_orth_proj(landmarks2d, cam)
            landmarks2d[..., 1:] = -landmarks2d[..., 1:]
            landmarks3d = util.batch_orth_proj(landmarks3d, cam)
            landmarks3d[..., 1:] = -landmarks3d[..., 1:]

            losses['landmark'] = util.l2_distance(
                landmarks2d[:, 17:, :2], gt_landmark[:, 17:, :2]) * cfg.w_lmks

            all_loss = 0.
            for key in losses.keys():
                all_loss = all_loss + losses[key]
            losses['all_loss'] = all_loss
            e_opt_rigid.zero_grad()
            all_loss.backward()
            e_opt_rigid.step()

            loss_info = '----iter: {}, time: {}\n'.format(
                k,
                datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))
            for key in losses.keys():
                loss_info = loss_info + '{}: {}, '.format(
                    key, float(losses[key]))
            if k % 10 == 0:
                print(loss_info)

            if k % 10 == 0:
                grids = {}
                visind = range(bz)  # [0]
                grids['images'] = torchvision.utils.make_grid(
                    images[visind]).detach().cpu()
                grids['landmarks_gt'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks[visind]))
                grids['landmarks2d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks2d[visind]))
                grids['landmarks3d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks3d[visind]))

                grid = torch.cat(list(grids.values()), 1)
                grid_image = (grid.numpy().transpose(1, 2, 0).copy() *
                              255)[:, :, [2, 1, 0]]
                grid_image = np.minimum(np.maximum(grid_image, 0),
                                        255).astype(np.uint8)
                cv2.imwrite('{}/{}.jpg'.format(savefolder, k), grid_image)

        # non-rigid fitting of all the parameters with 68 face landmarks, photometric loss and regularization terms.
        for k in range(200, 1000):
            losses = {}
            vertices, landmarks2d, landmarks3d = self.flame(
                shape_params=shape, expression_params=exp, pose_params=pose)
            trans_vertices = util.batch_orth_proj(vertices, cam)
            trans_vertices[..., 1:] = -trans_vertices[..., 1:]
            landmarks2d = util.batch_orth_proj(landmarks2d, cam)
            landmarks2d[..., 1:] = -landmarks2d[..., 1:]
            landmarks3d = util.batch_orth_proj(landmarks3d, cam)
            landmarks3d[..., 1:] = -landmarks3d[..., 1:]

            losses['landmark'] = util.l2_distance(
                landmarks2d[:, :, :2], gt_landmark[:, :, :2]) * cfg.w_lmks
            losses['shape_reg'] = (torch.sum(shape**2) /
                                   2) * cfg.w_shape_reg  # *1e-4
            losses['expression_reg'] = (torch.sum(exp**2) /
                                        2) * cfg.w_expr_reg  # *1e-4
            losses['pose_reg'] = (torch.sum(pose**2) / 2) * cfg.w_pose_reg

            ## render
            albedos = self.flametex(tex) / 255.
            ops = self.render(vertices, trans_vertices, albedos, lights)
            predicted_images = ops['images']
            losses['photometric_texture'] = (
                image_masks *
                (ops['images'] - images).abs()).mean() * cfg.w_pho

            all_loss = 0.
            for key in losses.keys():
                all_loss = all_loss + losses[key]
            losses['all_loss'] = all_loss
            e_opt.zero_grad()
            all_loss.backward()
            e_opt.step()

            loss_info = '----iter: {}, time: {}\n'.format(
                k,
                datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))
            for key in losses.keys():
                loss_info = loss_info + '{}: {}, '.format(
                    key, float(losses[key]))

            if k % 10 == 0:
                print(loss_info)

            # visualize
            if k % 10 == 0:
                grids = {}
                visind = range(bz)  # [0]
                grids['images'] = torchvision.utils.make_grid(
                    images[visind]).detach().cpu()
                grids['landmarks_gt'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks[visind]))
                grids['landmarks2d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks2d[visind]))
                grids['landmarks3d'] = torchvision.utils.make_grid(
                    util.tensor_vis_landmarks(images[visind],
                                              landmarks3d[visind]))
                grids['albedoimage'] = torchvision.utils.make_grid(
                    (ops['albedo_images'])[visind].detach().cpu())
                grids['render'] = torchvision.utils.make_grid(
                    predicted_images[visind].detach().float().cpu())
                shape_images = self.render.render_shape(
                    vertices, trans_vertices, images)
                grids['shape'] = torchvision.utils.make_grid(
                    F.interpolate(shape_images[visind],
                                  [224, 224])).detach().float().cpu()

                # grids['tex'] = torchvision.utils.make_grid(F.interpolate(albedos[visind], [224, 224])).detach().cpu()
                grid = torch.cat(list(grids.values()), 1)
                grid_image = (grid.numpy().transpose(1, 2, 0).copy() *
                              255)[:, :, [2, 1, 0]]
                grid_image = np.minimum(np.maximum(grid_image, 0),
                                        255).astype(np.uint8)

                cv2.imwrite('{}/{}.jpg'.format(savefolder, k), grid_image)

        single_params = {
            'shape': shape.detach().cpu().numpy(),
            'exp': exp.detach().cpu().numpy(),
            'pose': pose.detach().cpu().numpy(),
            'cam': cam.detach().cpu().numpy(),
            'verts': trans_vertices.detach().cpu().numpy(),
            'albedos': albedos.detach().cpu().numpy(),
            'tex': tex.detach().cpu().numpy(),
            'lit': lights.detach().cpu().numpy()
        }
        return single_params

    def run(self, imagepath, landmarkpath):
        # The implementation is potentially able to optimize with images(batch_size>1),
        # here we show the example with a single image fitting
        images = []
        landmarks = []
        image_masks = []

        image_name = os.path.basename(imagepath)[:-4]
        savefile = os.path.sep.join([cfg.save_folder, image_name + '.npy'])

        # photometric optimization is sensitive to the hair or glass occlusions,
        # therefore we use a face segmentation network to mask the skin region out.
        image_mask_folder = './FFHQ_seg/'
        image_mask_path = os.path.sep.join(
            [image_mask_folder, image_name + '.npy'])

        image = cv2.resize(cv2.imread(imagepath),
                           (cfg.cropped_size, cfg.cropped_size)).astype(
                               np.float32) / 255.
        image = image[:, :, [2, 1, 0]].transpose(2, 0, 1)
        images.append(torch.from_numpy(image[None, :, :, :]).to(self.device))

        image_mask = np.load(image_mask_path, allow_pickle=True)
        image_mask = image_mask[..., None].astype('float32')
        image_mask = image_mask.transpose(2, 0, 1)
        image_mask_bn = np.zeros_like(image_mask)
        image_mask_bn[np.where(image_mask != 0)] = 1.
        image_masks.append(
            torch.from_numpy(image_mask_bn[None, :, :, :]).to(self.device))

        landmark = np.load(landmarkpath).astype(np.float32)
        landmark[:, 0] = landmark[:, 0] / float(image.shape[2]) * 2 - 1
        landmark[:, 1] = landmark[:, 1] / float(image.shape[1]) * 2 - 1
        landmarks.append(
            torch.from_numpy(landmark)[None, :, :].float().to(self.device))

        images = torch.cat(images, dim=0)
        images = F.interpolate(images, [cfg.image_size, cfg.image_size])
        image_masks = torch.cat(image_masks, dim=0)
        image_masks = F.interpolate(image_masks,
                                    [cfg.image_size, cfg.image_size])

        landmarks = torch.cat(landmarks, dim=0)
        savefolder = os.path.sep.join([cfg.save_folder, image_name])

        util.check_mkdir(savefolder)
        # optimize
        single_params = self.optimize(images, landmarks, image_masks,
                                      savefolder)
        self.render.save_obj(filename=savefile[:-4] + '.obj',
                             vertices=torch.from_numpy(
                                 single_params['verts'][0]).to(self.device),
                             textures=torch.from_numpy(
                                 single_params['albedos'][0]).to(self.device))
        np.save(savefile, single_params)
Ejemplo n.º 23
0
import os
import os.path as osp
import cv2
import numpy as np
import pickle
from utils.renderer import Renderer
from smpl_torch import SMPLNP
from global_var import ROOT


if __name__ == '__main__':
    garment_class = 't-shirt'
    gender = 'female'
    img_size = 512
    renderer = Renderer(img_size)
    smpl = SMPLNP(gender=gender, cuda=False)

    pose_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pose')
    shape_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'shape')
    ss_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'style_shape')
    pose_vis_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pose_vis')
    ss_vis_dir = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'style_shape_vis')
    pivots_path = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'pivots.txt')
    avail_path = osp.join(ROOT, '{}_{}'.format(garment_class, gender), 'avail.txt')
    os.makedirs(pose_vis_dir, exist_ok=True)
    os.makedirs(ss_vis_dir, exist_ok=True)

    with open(os.path.join(ROOT, 'garment_class_info.pkl'), 'rb') as f:
        class_info = pickle.load(f, encoding='latin-1')
    body_f = smpl.base.faces
    garment_f = class_info[garment_class]['f']
Ejemplo n.º 24
0
from utils.shapes import Sphere, Cuboid, Tetrahedron
from utils.renderer import Renderer, Cam, Lit, Tri
import cv2

rend = Renderer()

sp = Sphere((0, 0, 0), 25)
shapes = sp.render()
cuboid = Cuboid((0, 0, -100))
cuboid.scale(25, axis=0)
cuboid.scale(50, axis=1)
cuboid.scale(75, axis=2)
cuboid.rotate(90, 90)
shapes.extend(cuboid.render())
tetrahedron = Tetrahedron((100, 0, 0))
tetrahedron.scale(25, axis=0)
tetrahedron.scale(50, axis=1)
tetrahedron.scale(20, axis=2)
tetrahedron.rotate(45, 180)
shapes.extend(tetrahedron.render())

background_prims = []
background_prims.append(Tri([(-1000.00,-40.00,1000.00), (1000.00,-40.00, 1000.00), (-1000.00,-40.00,-1000.00)]))
background_prims.append(Tri([(-1000.00,-40.00,-1000.00), (1000.00,-40.00, 1000.00), (1000.00,-40.00,-1000.00)]))


light = Lit((125,300,35),79000)
camera = [Cam((200,222,83),( -.5,-.7,-.5), (640,480))]
shadow, noshadow = rend.render(camera, light, shapes, background_prims)
cv2.imwrite("shadow.png", shadow)
cv2.imwrite("noshadow.png", noshadow)
Ejemplo n.º 25
0
    plt.savefig(args.img[:-4]+'_CMRpreds'+'.png', dpi=400)

if __name__ == '__main__':
    args = parser.parse_args()
    
    # Load model
    mesh = Mesh(device=DEVICE)
    # Our pretrained networks have 5 residual blocks with 256 channels. 
    # You might want to change this if you use a different architecture.
    model = CMR(mesh, 5, 256, pretrained_checkpoint=args.checkpoint)
    if DEVICE == torch.device("cuda"):
        model.cuda()
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer()

    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img, input_res=cfg.INPUT_RES)
    if DEVICE == torch.device("cuda"):
        norm_img = norm_img.cuda()
    with torch.no_grad():
        pred_vertices, pred_vertices_smpl, pred_camera, _, _ = model(norm_img)#.cuda())
        
    # Calculate camera parameters for rendering
    camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*cfg.FOCAL_LENGTH/(cfg.INPUT_RES * pred_camera[:,0] +1e-9)],dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    pred_vertices_smpl = pred_vertices_smpl[0].cpu().numpy()
    img = img.permute(1,2,0).cpu().numpy()
    
Ejemplo n.º 26
0
class Scene:

    def __init__(self, light_variability=(20,8), gridlines_on=None, gridlines_width=None, gridlines_spacing=None):
        if gridlines_on or gridlines_width or gridlines_spacing:
            assert not (gridlines_on is None\
                or gridlines_width is None\
                or gridlines_spacing is None),\
                "All gridlines variables must be set if any are"

        self.rend = Renderer()

        self.shapes = []
        self.grid_shapes = []

        self.center = np.array((0, 140, 300))

        self.light_variability = light_variability

        self.background_prims = []
        background_lower_bound = -1e3
        background_upper_bound = 1e3
        wall_bound = 1e3
        self.background_prims.append(
            Tri([(-wall_bound, 0, wall_bound),
                (wall_bound, 0, wall_bound),
                (-wall_bound, 0, -wall_bound)]))
        self.background_prims.append(
            Tri([(-wall_bound, 0, -wall_bound),
                (wall_bound, 0, wall_bound),
                (wall_bound, 0, -wall_bound)]))
        self.background_prims.append(
            Tri([(-wall_bound, -50, wall_bound),
                (0, wall_bound, wall_bound),
                (wall_bound, -50, wall_bound)]))

        if gridlines_on:
            for i in range(int((background_upper_bound - background_lower_bound) / (gridlines_width + gridlines_spacing))):
                offset = i * (gridlines_width + gridlines_spacing)
                self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_lower_bound),
                                            (background_lower_bound + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)]))
                self.grid_shapes.append(Tri([(background_lower_bound + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_upper_bound),
                                            (background_lower_bound + gridlines_width + offset, 0.01, background_lower_bound)]))
                self.grid_shapes.append(Tri([(background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset),
                                             (background_upper_bound, 0.01, background_lower_bound + offset),
                                             (background_lower_bound, 0.01, background_lower_bound + offset)]))
                self.grid_shapes.append(Tri([(background_upper_bound, 0.01, background_lower_bound + offset),
                                             (background_lower_bound, 0.01, background_lower_bound + gridlines_width + offset),
                                            (background_upper_bound, 0.01, background_lower_bound + gridlines_width + offset)]))

        self.default_light = np.array((400, 300, -800))
        self.default_intensity = 1000000
        self.camera = Cam((0, 140, 300), (128, 128))


    def calc_center(self):
        return mean([shape.center for shape in self.shapes])

    def add_object(self, i=-1):
        if i<0:
            i = randint(0, 7)
        shape = [Sphere(self.center, 0.5),
                 Tetrahedron(self.center),
                 Cuboid(self.center),
                 Cylinder(self.center, 50),
                 Pyramid(self.center),
                 Cone(self.center, 50),
                 Torus(self.center, 0.5, 50, 0.15),
                 HollowCuboid(self.center, 0.15)][i]
        shape.scale(35)
        self.shapes.append(shape)

    def mutate_object(self, shape):
        shape.scale(randint(25, 40))
        self.__rotate_object(shape)
        self.__translate_object(shape)

    def mutate_all_objects(self):
        for shape in self.shapes:
            self.__scale_object(shape)
            self.__rotate_object(shape)
            self.__translate_object(shape)

    def crossover(self, scene):
        offspring = Scene()
        offspring.shapes = self.shapes + scene.shapes
        shuffle(offspring.shapes)
        offspring.shapes = offspring.shapes[:len(offspring.shapes)//2]
        return offspring

    def mutate(self):
        if randint(0,1) == 0:
            self.add_object()
        else:
            shape = self.shapes[randint(0, len(self.shapes) - 1)]
            mutation = [self.__scale_object, self.__translate_object, self.__rotate_object][randint(0,2)]
            mutation(shape)


    def new_light(self, theta = 60, phi=8):
        d = norm(self.default_light - self.center)
        x_trans = d * math.sin(math.radians(theta))
        z_trans = d * math.cos(math.radians(theta))
        y_trans = d * math.sin(math.radians(phi))
        translation = np.array((x_trans, y_trans, -z_trans))
        return Lit(self.center + translation, self.default_intensity)


    def __scale_object(self, shape):
        for i in range(3):
            shape.scale(uniform(0.8, 1.2), axis=i)

    def __translate_object(self, shape):
        shape.translate((randint(-50, 50), 0, randint(-50, 50)))

    def ground_mesh(self):
        for shape in self.shapes:
            lowest_y = shape.lowest_y()
            shape.translate((0, -lowest_y, 0))


    def __rotate_object(self, shape):
        shape.rotate(randint(0, 359), randint(0, 359), randint(0, 359))

    def refocus_camera(self):
        self.camera.location = self.calc_center()

    def render(self):
        surface_prims = []
        light = self.new_light(*self.light_variability)#Lit(self.default_light, self.default_intensity)#
        for shape in self.shapes:
            surface_prims += shape.render()
        views = [self.camera.view_from(-30, 0, 200)]
        res_x, res_y = self.camera.resolution
        return self.rend.render(views, light, surface_prims, self.background_prims, res_x, res_y, self.grid_shapes, grid_color=(0.7,0.7,0.7))
Ejemplo n.º 27
0
    # Load model
    if args.config is None:
        tmp = args.checkpoint.split('/')[:-2]
        tmp.append('config.json')
        args.config = '/' + join(*tmp)

    with open(args.config, 'r') as f:
        options = json.load(f)
        options = namedtuple('options', options.keys())(**options)

    model = DMR(options, args.checkpoint)
    model.eval()

    # Setup renderer for visualization
    _, faces = read_obj('data/reference_mesh.obj')
    renderer = Renderer(faces=np.array(faces) - 1)

    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=cfg.INPUT_RES)
    with torch.no_grad():
        out_dict = model(norm_img.to(model.device))
        pred_vertices = out_dict['pred_vertices']
        pred_camera = out_dict['camera']

    # Calculate camera parameters for rendering
    camera_translation = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*cfg.FOCAL_LENGTH/(cfg.INPUT_RES * pred_camera[:,0] +1e-9)],dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    img = img.permute(1,2,0).cpu().numpy()

    # Render non-parametric shape
Ejemplo n.º 28
0
class Trainer_li(BaseTrainer):
    def init_fn(self):
        #self.dataset = 'h36m'
        #self.train_ds = BaseDataset(self.options, self.dataset)   # training dataset
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)
        self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(
            self.device)  # feature extraction model
        self.optimizer = torch.optim.Adam(
            params=self.model.parameters(),
            #lr=5e-5,
            lr=self.options.lr,
            weight_decay=0)
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=16,
                         create_transl=False).to(self.device)
        # per vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # keypoints loss including 2D and 3D
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # SMPL parameters loss if we have
        self.criterion_regr = nn.MSELoss().to(self.device)

        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH
        # initialize SMPLify
        self.smplify = SMPLify(step_size=1e-2,
                               batch_size=16,
                               num_iters=100,
                               focal_length=self.focal_length)
        print(self.options.pretrained_checkpoint)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)
        #load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)
        # create renderer
        self.renderer = Renderer(focal_length=self.focal_length,
                                 img_res=224,
                                 faces=self.smpl.faces)

    def finalize(self):
        self.fits_dict.save()

    def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d,
                      openpose_weight, gt_weight):
        """Compute 2D reprojection loss on the keypoints.
        The loss is weighted by the confidence.
        """
        conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
        conf[:, :25] *= openpose_weight
        conf[:, 25:] *= gt_weight
        loss = (conf * self.criterion_keypoints(
            pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean()
        return loss

    def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d,
                         has_pose_3d):
        pred_keypoints_3d = pred_keypoints_3d[:, 25:, :]
        conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
        gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
        gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1]
        conf = conf[has_pose_3d == 1]
        pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1]
        if len(gt_keypoints_3d) > 0:
            gt_pelvis = (gt_keypoints_3d[:, 2, :] +
                         gt_keypoints_3d[:, 3, :]) / 2
            gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
            pred_pelvis = (pred_keypoints_3d[:, 2, :] +
                           pred_keypoints_3d[:, 3, :]) / 2
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
            return (conf * self.criterion_keypoints(pred_keypoints_3d,
                                                    gt_keypoints_3d)).mean()
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def shape_loss(self, pred_vertices, gt_vertices, has_smpl):
        """Compute per-vertex loss on the shape for the examples that SMPL annotations are available."""
        pred_vertices_with_shape = pred_vertices[has_smpl == 1]
        gt_vertices_with_shape = gt_vertices[has_smpl == 1]
        if len(gt_vertices_with_shape) > 0:
            return self.criterion_shape(pred_vertices_with_shape,
                                        gt_vertices_with_shape)
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas,
                    has_smpl):
        pred_rotmat_valid = pred_rotmat[has_smpl == 1]
        gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view(
            -1, 24, 3, 3)[has_smpl == 1]
        #print(pred_rotmat_valid.size(),gt_rotmat_valid.size())
        #input()
        pred_betas_valid = pred_betas[has_smpl == 1]
        gt_betas_valid = gt_betas[has_smpl == 1]
        if len(pred_rotmat_valid) > 0:
            loss_regr_pose = self.criterion_regr(pred_rotmat_valid,
                                                 gt_rotmat_valid)
            loss_regr_betas = self.criterion_regr(pred_betas_valid,
                                                  gt_betas_valid)
        else:
            loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device)
            loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device)
        return loss_regr_pose, loss_regr_betas

    def train_step(self, input_batch):
        self.model.train()
        # get data from batch
        has_smpl = input_batch['has_smpl'].bool()
        has_pose_3d = input_batch['has_pose_3d'].bool()
        gt_pose1 = input_batch['pose']  # SMPL pose parameters
        gt_betas1 = input_batch['betas']  # SMPL beta parameters
        dataset_name = input_batch['dataset_name']
        indices = input_batch[
            'sample_index']  # index of example inside its dataset
        is_flipped = input_batch[
            'is_flipped']  # flag that indicates whether image was flipped during data augmentation
        rot_angle = input_batch[
            'rot_angle']  # rotation angle used for data augmentation
        #print(rot_angle)
        # Get GT vertices and model joints
        # Note that gt_model_joints is different from gt_joints as it comes from SMPL
        gt_betas = torch.cat((gt_betas1, gt_betas1, gt_betas1, gt_betas1), 0)
        gt_pose = torch.cat((gt_pose1, gt_pose1, gt_pose1, gt_pose1), 0)
        gt_out = self.smpl(betas=gt_betas,
                           body_pose=gt_pose[:, 3:],
                           global_orient=gt_pose[:, :3])
        gt_model_joints = gt_out.joints
        gt_vertices = gt_out.vertices
        # Get current best fits from the dictionary
        opt_pose1, opt_betas1 = self.fits_dict[(dataset_name, indices.cpu(),
                                                rot_angle.cpu(),
                                                is_flipped.cpu())]
        opt_pose = torch.cat(
            (opt_pose1.to(self.device), opt_pose1.to(self.device),
             opt_pose1.to(self.device), opt_pose1.to(self.device)), 0)
        #print(opt_pose.device)
        #opt_betas = opt_betas.to(self.device)
        opt_betas = torch.cat(
            (opt_betas1.to(self.device), opt_betas1.to(self.device),
             opt_betas1.to(self.device), opt_betas1.to(self.device)), 0)
        opt_output = self.smpl(betas=opt_betas,
                               body_pose=opt_pose[:, 3:],
                               global_orient=opt_pose[:, :3])
        opt_vertices = opt_output.vertices
        opt_joints = opt_output.joints
        # images
        images = torch.cat((input_batch['img_0'], input_batch['img_1'],
                            input_batch['img_2'], input_batch['img_3']), 0)
        batch_size = input_batch['img_0'].shape[0]
        #input()
        # Output of CNN
        pred_rotmat, pred_betas, pred_camera = self.model(images)
        pred_output = self.smpl(betas=pred_betas,
                                body_pose=pred_rotmat[:, 1:],
                                global_orient=pred_rotmat[:, 0].unsqueeze(1),
                                pose2rot=False)
        pred_vertices = pred_output.vertices
        pred_joints = pred_output.joints
        pred_cam_t = torch.stack([
            pred_camera[:, 1], pred_camera[:, 2], 2 * self.focal_length /
            (self.options.img_res * pred_camera[:, 0] + 1e-9)
        ],
                                 dim=-1)
        camera_center = torch.zeros(batch_size * 4, 2, device=self.device)
        pred_keypoints_2d = perspective_projection(
            pred_joints,
            rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(
                batch_size * 4, -1, -1),
            translation=pred_cam_t,
            focal_length=self.focal_length,
            camera_center=camera_center)
        pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.)
        # 2d joint points
        gt_keypoints_2d = torch.cat(
            (input_batch['keypoints_0'], input_batch['keypoints_1'],
             input_batch['keypoints_2'], input_batch['keypoints_3']), 0)
        gt_keypoints_2d_orig = gt_keypoints_2d.clone()
        gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * (
            gt_keypoints_2d_orig[:, :, :-1] + 1)
        gt_cam_t = estimate_translation(gt_model_joints,
                                        gt_keypoints_2d_orig,
                                        focal_length=self.focal_length,
                                        img_size=self.options.img_res)
        opt_cam_t = estimate_translation(opt_joints,
                                         gt_keypoints_2d_orig,
                                         focal_length=self.focal_length,
                                         img_size=self.options.img_res)
        #input()
        opt_joint_loss = self.smplify.get_fitting_loss(
            opt_pose, opt_betas, opt_cam_t, 0.5 * self.options.img_res *
            torch.ones(batch_size * 4, 2, device=self.device),
            gt_keypoints_2d_orig).mean(dim=-1)
        if self.options.run_smplify:
            pred_rotmat_hom = torch.cat([
                pred_rotmat.detach().view(-1, 3, 3).detach(),
                torch.tensor(
                    [0, 0, 1], dtype=torch.float32, device=self.device).view(
                        1, 3, 1).expand(batch_size * 4 * 24, -1, -1)
            ],
                                        dim=-1)
            pred_pose = rotation_matrix_to_angle_axis(
                pred_rotmat_hom).contiguous().view(batch_size * 4, -1)
            pred_pose[torch.isnan(pred_pose)] = 0.0
            #pred_pose_detach = pred_pose.detach()
            #pred_betas_detach = pred_betas.detach()
            #pred_cam_t_detach = pred_cam_t.detach()
            new_opt_vertices, new_opt_joints,\
            new_opt_pose, new_opt_betas,\
            new_opt_cam_t, new_opt_joint_loss = self.smplify(
                                        pred_pose.detach(), pred_betas.detach(),
                                        pred_cam_t.detach(),
                                        0.5 * self.options.img_res * torch.ones(batch_size*4, 2, device=self.device),
                                        gt_keypoints_2d_orig)
            new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1)
            # Will update the dictionary for the examples where the new loss is less than the current one
            update = (new_opt_joint_loss < opt_joint_loss)
            update1 = torch.cat((update, update, update, update), 0)
            opt_joint_loss[update] = new_opt_joint_loss[update]
            #print(opt_joints.size(),new_opt_joints.size())
            #input()
            opt_joints[update1, :] = new_opt_joints[update1, :]
            #print(opt_pose.size(),new_opt_pose.size())
            opt_betas[update1, :] = new_opt_betas[update1, :]
            opt_pose[update1, :] = new_opt_pose[update1, :]
            #print(i, opt_pose_mv[i])
            opt_vertices[update1, :] = new_opt_vertices[update1, :]
            opt_cam_t[update1, :] = new_opt_cam_t[update1, :]
        # now we comput the loss on the four images
        # Replace the optimized parameters with the ground truth parameters, if available
        #for i in range(4):
        #print('Here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1')
        has_smpl1 = torch.cat((has_smpl, has_smpl, has_smpl, has_smpl), 0)
        opt_vertices[has_smpl1, :, :] = gt_vertices[has_smpl1, :, :]
        opt_pose[has_smpl1, :] = gt_pose[has_smpl1, :]
        opt_cam_t[has_smpl1, :] = gt_cam_t[has_smpl1, :]
        opt_joints[has_smpl1, :, :] = gt_model_joints[has_smpl1, :, :]
        opt_betas[has_smpl1, :] = gt_betas[has_smpl1, :]
        #print(opt_cam_t[0:batch_size],opt_cam_t[batch_size:2*batch_size],opt_cam_t[2*batch_size:3*batch_size],opt_cam_t[3*batch_size:4*batch_size])
        # Assert whether a fit is valid by comparing the joint loss with the threshold
        valid_fit1 = (opt_joint_loss < self.options.smplify_threshold).to(
            self.device)
        # Add the examples with GT parameters to the list of valid fits
        valid_fit = torch.cat(
            (valid_fit1, valid_fit1, valid_fit1, valid_fit1), 0) | has_smpl1

        #gt_keypoints_2d = torch.cat((input_batch['keypoints_0'],input_batch['keypoints_1'],input_batch['keypoints_2'],input_batch['keypoints_3']),0)
        loss_keypoints = self.keypoint_loss(pred_keypoints_2d, gt_keypoints_2d,
                                            0, 1)
        #gt_joints = torch.cat((input_batch['pose_3d_0'],input_batch['pose_3d_1'],input_batch['pose_3d_2'],input_batch['pose_3d_3']),0)
        #loss_keypoints_3d = self.keypoint_3d_loss(pred_joints, gt_joints, torch.cat((has_pose_3d,has_pose_3d,has_pose_3d,has_pose_3d),0))
        loss_regr_pose, loss_regr_betas = self.smpl_losses(
            pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit)
        loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit)
        #print(loss_shape_sum,loss_keypoints_sum,loss_keypoints_3d_sum,loss_regr_pose_sum,loss_regr_betas_sum)
        #input()
        loss_all = 0 * loss_shape +\
                   5. * loss_keypoints +\
                   0. * loss_keypoints_3d +\
                   loss_regr_pose + 0.001* loss_regr_betas +\
                   ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean()

        loss_all *= 60
        #print(loss_all)

        # Do backprop
        self.optimizer.zero_grad()
        loss_all.backward()
        self.optimizer.step()
        output = {
            'pred_vertices': pred_vertices,
            'opt_vertices': opt_vertices,
            'pred_cam_t': pred_cam_t,
            'opt_cam_t': opt_cam_t
        }
        losses = {
            'loss': loss_all.detach().item(),
            'loss_keypoints': loss_keypoints.detach().item(),
            'loss_keypoints_3d': loss_keypoints_3d.detach().item(),
            'loss_regr_pose': loss_regr_pose.detach().item(),
            'loss_regr_betas': loss_regr_betas.detach().item(),
            'loss_shape': loss_shape.detach().item()
        }

        return output, losses

    def train_summaries(self, input_batch, output, losses):
        pred_vertices = output['pred_vertices']
        opt_vertices = output['opt_vertices']
        pred_cam_t = output['pred_cam_t']

        opt_cam_t = output['opt_cam_t']
        images_pred = self.renderer.visualize_tb(pred_vertices, pred_cam_t,
                                                 input_batch)
        images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t,
                                                input_batch)

        self.summary_writer.add_image('pred_shape', images_pred,
                                      self.step_count)
        self.summary_writer.add_image('opt_shape', images_opt, self.step_count)
        for loss_name, val in losses.items():
            self.summary_writer.add_scalar(loss_name, val, self.step_count)
Ejemplo n.º 29
0
                      2))  # n_skirt, n_body
body_ind = np.argsort(dist, 1)[:, :K]
body_dist = np.sort(dist, 1)[:, :K]
# Inverse distance weighting
w = 1 / (body_dist**p)
w = w / np.sum(w, 1, keepdims=True)
n_skirt = len(skirt_v)
n_body = len(body_v)
skirt_weight = np.zeros([n_skirt, n_body], dtype=np.float32)
skirt_weight[np.tile(np.arange(n_skirt)[:, None], (1, K)), body_ind] = w
np.savez_compressed('C:/data/v3/skirt_weight.npz', w=skirt_weight)

exit()

# test
renderer = Renderer(512)
smpl = SMPLNP(gender='female', skirt=True)
smpl_torch = TorchSMPL4Garment('female')

import torch
disp = smpl_torch.forward_unpose_deformation(
    torch.from_numpy(np.zeros([1, 72])).float(),
    torch.from_numpy(np.zeros([1, 300])).float(),
    torch.from_numpy(skirt_v)[None].float())
disp = disp.detach().cpu().numpy()[0]

for t in np.linspace(0, 1, 20):
    theta = np.zeros([72])
    theta[5] = t
    theta[8] = -t
    body_v, gar_v = smpl(np.zeros([300]), theta, disp, 'skirt')
Ejemplo n.º 30
0
class Trainer(BaseTrainer):
    def init_fn(self):
        self.options.img_res = cfg.DANET.INIMG_SIZE
        self.options.heatmap_size = cfg.DANET.HEATMAP_SIZE
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)

        self.model = DaNet(options=self.options,
                           smpl_mean_params=path_config.SMPL_MEAN_PARAMS).to(
                               self.device)
        self.smpl = self.model.iuv2smpl.smpl

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=cfg.SOLVER.BASE_LR,
                                          weight_decay=0)

        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        # Load dictionary of fits of SPIN
        self.fits_dict = FitsDict(self.options, self.train_ds)

        # Create renderer
        try:
            self.renderer = Renderer(focal_length=self.focal_length,
                                     img_res=self.options.img_res,
                                     faces=self.smpl.faces)
        except:
            Warning('No renderer for visualization.')
            self.renderer = None

        self.decay_steps_ind = 1

    def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d,
                      openpose_weight, gt_weight):
        """ Compute 2D reprojection loss on the keypoints.
        The loss is weighted by the confidence.
        The available keypoints are different for each dataset.
        """
        conf = gt_keypoints_2d[:, :, -1].unsqueeze(-1).clone()
        conf[:, :25] *= openpose_weight
        conf[:, 25:] *= gt_weight
        loss = (conf * self.criterion_keypoints(
            pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean()
        return loss

    def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d,
                         has_pose_3d):
        """Compute 3D keypoint loss for the examples that 3D keypoint annotations are available.
        The loss is weighted by the confidence.
        """
        pred_keypoints_3d = pred_keypoints_3d[:, 25:, :]
        conf = gt_keypoints_3d[:, :, -1].unsqueeze(-1).clone()
        gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].clone()
        gt_keypoints_3d = gt_keypoints_3d[has_pose_3d == 1]
        conf = conf[has_pose_3d == 1]
        pred_keypoints_3d = pred_keypoints_3d[has_pose_3d == 1]
        if len(gt_keypoints_3d) > 0:
            gt_pelvis = (gt_keypoints_3d[:, 2, :] +
                         gt_keypoints_3d[:, 3, :]) / 2
            gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :]
            pred_pelvis = (pred_keypoints_3d[:, 2, :] +
                           pred_keypoints_3d[:, 3, :]) / 2
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :]
            return (conf * self.criterion_keypoints(pred_keypoints_3d,
                                                    gt_keypoints_3d)).mean()
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def shape_loss(self, pred_vertices, gt_vertices, has_smpl):
        """Compute per-vertex loss on the shape for the examples that SMPL annotations are available."""
        pred_vertices_with_shape = pred_vertices[has_smpl == 1]
        gt_vertices_with_shape = gt_vertices[has_smpl == 1]
        if len(gt_vertices_with_shape) > 0:
            return self.criterion_shape(pred_vertices_with_shape,
                                        gt_vertices_with_shape)
        else:
            return torch.FloatTensor(1).fill_(0.).to(self.device)

    def smpl_losses(self, pred_rotmat, pred_betas, gt_pose, gt_betas,
                    has_smpl):
        pred_rotmat_valid = pred_rotmat[has_smpl == 1]
        gt_rotmat_valid = batch_rodrigues(gt_pose.view(-1, 3)).view(
            -1, 24, 3, 3)[has_smpl == 1]
        pred_betas_valid = pred_betas[has_smpl == 1]
        gt_betas_valid = gt_betas[has_smpl == 1]
        if len(pred_rotmat_valid) > 0:
            loss_regr_pose = self.criterion_regr(pred_rotmat_valid,
                                                 gt_rotmat_valid)
            loss_regr_betas = self.criterion_regr(pred_betas_valid,
                                                  gt_betas_valid)
        else:
            loss_regr_pose = torch.FloatTensor(1).fill_(0.).to(self.device)
            loss_regr_betas = torch.FloatTensor(1).fill_(0.).to(self.device)
        return loss_regr_pose, loss_regr_betas

    def train_step(self, input_batch):

        # Learning rate decay
        if self.decay_steps_ind < len(cfg.SOLVER.STEPS) and input_batch[
                'step_count'] == cfg.SOLVER.STEPS[self.decay_steps_ind]:
            lr = self.optimizer.param_groups[0]['lr']
            lr_new = lr * cfg.SOLVER.GAMMA
            print('Decay the learning on step {} from {} to {}'.format(
                input_batch['step_count'], lr, lr_new))
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr_new
            lr = self.optimizer.param_groups[0]['lr']
            assert lr == lr_new
            self.decay_steps_ind += 1

        self.model.train()

        # Get data from the batch
        images = input_batch['img']  # input image
        gt_keypoints_2d = input_batch['keypoints']  # 2D keypoints
        gt_pose = input_batch['pose']  # SMPL pose parameters
        gt_betas = input_batch['betas']  # SMPL beta parameters
        gt_joints = input_batch['pose_3d']  # 3D pose
        has_smpl = input_batch['has_smpl'].byte(
        )  # flag that indicates whether SMPL parameters are valid
        has_pose_3d = input_batch['has_pose_3d'].byte(
        )  # flag that indicates whether 3D pose is valid
        is_flipped = input_batch[
            'is_flipped']  # flag that indicates whether image was flipped during data augmentation
        rot_angle = input_batch[
            'rot_angle']  # rotation angle used for data augmentation
        dataset_name = input_batch[
            'dataset_name']  # name of the dataset the image comes from
        indices = input_batch[
            'sample_index']  # index of example inside its dataset
        batch_size = images.shape[0]

        # Get GT vertices and model joints
        # Note that gt_model_joints is different from gt_joints as it comes from SMPL
        gt_out = self.smpl(betas=gt_betas,
                           body_pose=gt_pose[:, 3:],
                           global_orient=gt_pose[:, :3])
        gt_model_joints = gt_out.joints
        gt_vertices = gt_out.vertices

        # Get current pseudo labels (final fits of SPIN) from the dictionary
        opt_pose, opt_betas = self.fits_dict[(dataset_name, indices.cpu(),
                                              rot_angle.cpu(),
                                              is_flipped.cpu())]
        opt_pose = opt_pose.to(self.device)
        opt_betas = opt_betas.to(self.device)

        # Replace extreme betas with zero betas
        opt_betas[(opt_betas.abs() > 3).any(dim=-1)] = 0.
        # Replace the optimized parameters with the ground truth parameters, if available
        opt_pose[has_smpl, :] = gt_pose[has_smpl, :]
        opt_betas[has_smpl, :] = gt_betas[has_smpl, :]

        opt_output = self.smpl(betas=opt_betas,
                               body_pose=opt_pose[:, 3:],
                               global_orient=opt_pose[:, :3])
        opt_vertices = opt_output.vertices
        opt_joints = opt_output.joints

        # De-normalize 2D keypoints from [-1,1] to pixel space
        gt_keypoints_2d_orig = gt_keypoints_2d.clone()
        gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * (
            gt_keypoints_2d_orig[:, :, :-1] + 1)

        # Estimate camera translation given the model joints and 2D keypoints
        # by minimizing a weighted least squares loss
        gt_cam_t = estimate_translation(gt_model_joints,
                                        gt_keypoints_2d_orig,
                                        focal_length=self.focal_length,
                                        img_size=self.options.img_res)

        opt_cam_t = estimate_translation(opt_joints,
                                         gt_keypoints_2d_orig,
                                         focal_length=self.focal_length,
                                         img_size=self.options.img_res)

        if self.options.train_data in ['h36m_coco_itw']:
            valid_fit = self.fits_dict.get_vaild_state(dataset_name,
                                                       indices.cpu()).to(
                                                           self.device)
            valid_fit = valid_fit | has_smpl
        else:
            valid_fit = has_smpl

        # Feed images in the network to predict camera and SMPL parameters
        input_batch['opt_pose'] = opt_pose
        input_batch['opt_betas'] = opt_betas
        input_batch['valid_fit'] = valid_fit

        input_batch['dp_dict'] = {
            k: v.to(self.device) if isinstance(v, torch.Tensor) else v
            for k, v in input_batch['dp_dict'].items()
        }
        has_iuv = torch.tensor([dn not in ['dp_coco'] for dn in dataset_name],
                               dtype=torch.uint8).to(self.device)
        has_iuv = has_iuv & valid_fit
        input_batch['has_iuv'] = has_iuv
        has_dp = input_batch['has_dp']
        target_smpl_kps = torch.zeros(
            (batch_size, 24, 3)).to(opt_output.smpl_joints.device)
        target_smpl_kps[:, :, :2] = perspective_projection(
            opt_output.smpl_joints.detach().clone(),
            rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(
                batch_size, -1, -1),
            translation=opt_cam_t,
            focal_length=self.focal_length,
            camera_center=torch.zeros(batch_size, 2, device=self.device) +
            (0.5 * self.options.img_res))
        target_smpl_kps[:, :, :2] = target_smpl_kps[:, :, :2] / (
            0.5 * self.options.img_res) - 1
        target_smpl_kps[has_iuv == 1, :, 2] = 1
        target_smpl_kps[has_dp == 1] = input_batch['smpl_2dkps'][has_dp == 1]
        input_batch['target_smpl_kps'] = target_smpl_kps  # [B, 24, 3]
        input_batch['target_verts'] = opt_vertices.detach().clone(
        )  # [B, 6890, 3]

        # camera translation for neural renderer
        gt_cam_t_nr = opt_cam_t.detach().clone()
        gt_camera = torch.zeros(gt_cam_t_nr.shape).to(gt_cam_t_nr.device)
        gt_camera[:, 1:] = gt_cam_t_nr[:, :2]
        gt_camera[:, 0] = (2. * self.focal_length /
                           self.options.img_res) / gt_cam_t_nr[:, 2]
        input_batch['target_cam'] = gt_camera

        # Do forward
        danet_return_dict = self.model(input_batch)

        loss_tatal = 0
        losses_dict = {}
        for loss_key in danet_return_dict['losses']:
            loss_tatal += danet_return_dict['losses'][loss_key]
            losses_dict['loss_{}'.format(loss_key)] = danet_return_dict[
                'losses'][loss_key].detach().item()

        # Do backprop
        self.optimizer.zero_grad()
        loss_tatal.backward()
        self.optimizer.step()

        if input_batch['pretrain_mode']:
            pred_vertices = None
            pred_cam_t = None
        else:
            pred_vertices = danet_return_dict['prediction']['vertices'].detach(
            )
            pred_cam_t = danet_return_dict['prediction']['cam_t'].detach()

        # Pack output arguments for tensorboard logging
        output = {
            'pred_vertices': pred_vertices,
            'opt_vertices': opt_vertices,
            'pred_cam_t': pred_cam_t,
            'opt_cam_t': opt_cam_t,
            'visualization': danet_return_dict['visualization']
        }

        losses_dict.update({'loss_tatal': loss_tatal.detach().item()})

        return output, losses_dict

    def train_summaries(self, input_batch, output, losses):
        for loss_name, val in losses.items():
            self.summary_writer.add_scalar(loss_name, val, self.step_count)

    def visualize(self, input_batch, output, losses):
        images = input_batch['img']
        images = images * torch.tensor(
            [0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1)
        images = images + torch.tensor(
            [0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1)

        pred_vertices = output['pred_vertices']
        opt_vertices = output['opt_vertices']
        pred_cam_t = output['pred_cam_t']
        opt_cam_t = output['opt_cam_t']
        if self.renderer is not None:
            images_opt = self.renderer.visualize_tb(opt_vertices, opt_cam_t,
                                                    images)
            self.summary_writer.add_image('opt_shape', images_opt,
                                          self.step_count)
            if pred_vertices is not None:
                images_pred = self.renderer.visualize_tb(
                    pred_vertices, pred_cam_t, images)
                self.summary_writer.add_image('pred_shape', images_pred,
                                              self.step_count)

        for key_name in [
                'pred_uv', 'gt_uv', 'part_uvi_pred', 'part_uvi_gt',
                'skps_hm_pred', 'skps_hm_pred_soft', 'skps_hm_gt',
                'skps_hm_gt_soft'
        ]:
            if key_name in output['visualization']:
                vis_uv_raw = output['visualization'][key_name]
                if key_name in ['pred_uv', 'gt_uv']:
                    iuv = F.interpolate(vis_uv_raw,
                                        scale_factor=4.,
                                        mode='nearest')
                    img_iuv = images.clone()
                    img_iuv[iuv > 0] = iuv[iuv > 0]
                    vis_uv = make_grid(img_iuv, padding=1, pad_value=1)
                else:
                    vis_uv = make_grid(vis_uv_raw, padding=1, pad_value=1)
                self.summary_writer.add_image(key_name, vis_uv,
                                              self.step_count)

        if 'target_smpl_kps' in input_batch:
            smpl_kps = input_batch['target_smpl_kps'].detach()
            smpl_kps[:, :, :2] *= images.size(-1) / 2.
            smpl_kps[:, :, :2] += images.size(-1) / 2.
            img_smpl_hm = images.detach().clone()
            img_with_smpljoints = vis_utils.vis_batch_image_with_joints(
                img_smpl_hm.data,
                smpl_kps.cpu().numpy(),
                np.ones((smpl_kps.shape[0], smpl_kps.shape[1], 1)))
            img_with_smpljoints = np.transpose(img_with_smpljoints, (2, 0, 1))
            self.summary_writer.add_image('stn_centers_gt',
                                          img_with_smpljoints, self.step_count)

        if 'stn_kps_pred' in output['visualization']:
            smpl_kps = output['visualization']['stn_kps_pred']
            smpl_kps[:, :, :2] *= images.size(-1) / 2.
            smpl_kps[:, :, :2] += images.size(-1) / 2.
            img_smpl_hm = images.detach().clone()
            if 'skps_hm_gt' in output['visualization']:
                smpl_hm = output['visualization']['skps_hm_gt'].expand(
                    -1, 3, -1, -1)
                smpl_hm = F.interpolate(smpl_hm,
                                        scale_factor=output.size(-1) /
                                        smpl_hm.size(-1))
                img_smpl_hm[smpl_hm > 0.1] = smpl_hm[smpl_hm > 0.1]
            img_with_smpljoints = vis_utils.vis_batch_image_with_joints(
                img_smpl_hm.data,
                smpl_kps.cpu().numpy(),
                np.ones((smpl_kps.shape[0], smpl_kps.shape[1], 1)))
            img_with_smpljoints = np.transpose(img_with_smpljoints, (2, 0, 1))
            self.summary_writer.add_image('stn_centers_pred',
                                          img_with_smpljoints, self.step_count)