def __init__(self, args):
        super(SeperateFPAndNS, self).__init__(args)
        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.gpu_ids = args.gpu_ids
        self.all_obj_names = args.object_list

        # configs w.r.t. two losses
        self.joint_two_losses = args.joint_two_losses
        self.loss1_or_loss2 = None
        if args.loss1_w < 0.00001:
            self.loss1_or_loss2 = False  # update loss2 only
        elif args.loss2_w < 0.00001:
            self.loss1_or_loss2 = True  # update loss1 only
        self.ns_optim, self.fp_optim = None, None

        # see gradients for debugging
        self.vis_grad = args.vis_grad
        self.grad_vis = None

        self.train_res = args.train_res or self.vis_grad

        self.fp = ForcePredictor(args)
        self.ns = NeuralForceSimulator(args=args)

        # deprecating
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
 def __init__(self, args):
     super(NeuralForceSimulator, self).__init__()
     self.clean_force = False
     # neural force simulator
     self.only_first_img_feature = True
     self.vis_grad = args.vis_grad
     self.train_res = args.train_res or self.vis_grad
     self.hidden_size = 512
     self.image_feature_dim = 512
     self.num_layers = 3
     self.sequence_length = args.sequence_length
     self.object_feature_size = 512
     self.environment = args.instance_environment
     self.number_of_cp = args.number_of_cp
     self.feature_extractor = resnet18(pretrained=args.pretrain)
     del self.feature_extractor.fc
     if not self.train_res:
         self.feature_extractor.eval()
     self.use_lstm = args.lstm
     self.norm_position = False
     if self.use_lstm:
         self.one_ns_layer = NSLSTM(
             hidden_size=self.hidden_size,
             layer_norm=False,
             image_feature_dim=self.image_feature_dim,
             norm_position=self.norm_position)
     else:
         self.one_ns_layer = NSWithImageFeature(
             hidden_size=self.hidden_size,
             layer_norm=False,
             image_feature_dim=self.image_feature_dim,
             norm_position=self.norm_position)
     self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
     input_object_embed_size = torch.Tensor(
         [3 + 4, 100, self.object_feature_size])
     self.input_object_embed = input_embedding_net(
         input_object_embed_size.long().tolist(),
         dropout=args.dropout_ratio)
     self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                 hidden_size=self.hidden_size,
                                 batch_first=True,
                                 num_layers=self.num_layers)
     # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names}
     assert self.number_of_cp == 5  # for five fingers
     self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
         args.data)
     if args.gpu_ids != -1:
         for obj, val in self.all_objects_keypoint_tensor.items():
             self.all_objects_keypoint_tensor[obj] = val.cuda()
     self.ns_ratio, self.phy_ratio, self.gt_ratio = 1, 1, 0
     total_r = self.ns_ratio + self.phy_ratio + self.gt_ratio
     self.ns_ratio, self.phy_ratio, self.gt_ratio = self.ns_ratio / total_r, self.phy_ratio / total_r, \
                                                    self.gt_ratio / total_r
Esempio n. 3
0
    def __init__(self, args):
        super(PredictInitPoseAndForce, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        # self.input_feature_size = self.image_feature_size + self.object_feature_size
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)

        predict_initial_pose_size = torch.Tensor([(2 + 3) * 10, 100, 3 + 4])
        self.predict_initial_pose = input_embedding_net(predict_initial_pose_size.long().tolist(), dropout=args.dropout_ratio)

        input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio)
        state_embed_size = torch.Tensor([EnvState.total_size + self.cp_feature_size, 100, self.object_feature_size])
        self.state_embed = input_embedding_net(state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor([self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers

        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
            global DEFAULT_IMAGE_SIZE
            DEFAULT_IMAGE_SIZE = DEFAULT_IMAGE_SIZE.cuda()
    def __init__(self, args):
        super(NoForceOnlyCPModel, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        feature_extractors = {'resnet18': resnet18, 'resnet50': resnet50}
        self.feature_extractor = feature_extractors[args.feature_extractor](
            pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()
        if args.feature_extractor == 'resnet18':
            self.image_feature_size = 512
        else:
            self.image_feature_size = 2048
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(self.image_feature_size, 64,
                                              args.dropout_ratio)
        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
Esempio n. 5
0
    def __init__(self, args):
        super(BaselineRegressForce, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio)
        contact_point_embed_size = torch.Tensor([3 * 5, 100, self.object_feature_size])
        self.contact_point_embed = input_embedding_net(contact_point_embed_size.long().tolist(), dropout=args.dropout_ratio)
        self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers)

        force_decoder_size = torch.Tensor([self.hidden_size * 2, 100, (3) * self.number_of_cp])
        self.force_decoder = input_embedding_net(force_decoder_size.long().tolist(), dropout=args.dropout_ratio)
        assert (args.mode == 'train' and args.batch_size > 1 and args.break_batch == 1) or (args.mode != 'train' and args.batch_size == 1)

        self.train_mode = (args.mode == 'train')

        assert self.number_of_cp == 5  # for five fingers

        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()

        if not self.train_mode:
            BaselineRegressForce.metric += [
                metrics.ObjKeypointMetric,  # During test time add it
                metrics.ObjRotationMetric,
                metrics.ObjPositionMetric,
            ]
    def __init__(self, args):
        super(NoModelGTForceBaseline, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.dummy_layer = nn.Linear(10, 10)

        self.this_loss_func = self.loss(args)
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
    def __init__(self, args):
        super(BatchCPHeatmapModel, self).__init__(args)

        self.use_syn = args.use_syn
        self.ori_w, self.ori_h = 1920, 1080
        self.environment_layer = BatchCPGradientLayer
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        plane_dim = 1024 if self.use_syn else 512
        self.image_embed = combine_block_w_do(plane_dim, 64,
                                              args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            plane_dim, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            EnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'we do not use more than 1 batch size, but accumulate gradients of several steps.'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
    def __init__(self, args):
        super(ForcePredictor, self).__init__()
        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.sequence_length = args.sequence_length
        self.number_of_cp = args.number_of_cp
        self.use_gt_cp = args.use_gt_cp
        self.vis_grad = args.vis_grad
        self.train_res = args.train_res or self.vis_grad
        self.grad_value = None

        # force predictor networks.
        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc
        if not self.train_res:
            self.feature_extractor.eval()
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3
        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            512, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            NoGradEnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
Esempio n. 9
0
    def __init__(self, args):
        super(JointNS, self).__init__(args)
        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3

        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids
        self.all_obj_names = args.object_list
        self.use_gt_cp = args.use_gt_cp
        self.clean_force = True

        # configs w.r.t. two losses
        self.joint_two_losses = args.joint_two_losses
        self.loss1_or_loss2 = None
        if args.loss1_w < 0.00001:
            self.loss1_or_loss2 = False  # update loss2 only
        elif args.loss2_w < 0.00001:
            self.loss1_or_loss2 = True  # update loss1 only
        self.loss1_optim, self.loss2_optim, self.joint_optim = None, None, None

        # neural force simulator
        self.use_image_feature = True
        if not self.use_image_feature:
            self.one_ns_layer = MLPNS(hidden_size=64, layer_norm=False)
        else:
            self.one_ns_layer = NSWithImageFeature(hidden_size=64,
                                                   layer_norm=False,
                                                   image_feature_dim=512)
        # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names}

        # force predictor networks.
        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc
        self.feature_extractor.eval()
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3
        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            512, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            NoGradEnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()

        self.force_predictor_modules = [
            self.feature_extractor, self.image_embed,
            self.contact_point_image_embed, self.input_object_embed,
            self.contact_point_input_object_embed, self.state_embed,
            self.lstm_encoder, self.contact_point_encoder,
            self.contact_point_decoder, self.forces_directions_decoder
        ]

        # see gradients for debugging
        self.vis_grad = args.vis_grad
        self.grad_vis = None

        self.train_res = args.train_res or self.vis_grad