Example #1
0
    def __init__(self, args):
        super(ActionReprModel, self).__init__(args)
        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)
        self.embedding_input = nn.Linear(args.image_feature, args.hidden_size)


        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.number_of_layers = 3

        self.lstm = nn.LSTM(64 * 7 * 7, self.hidden_size, batch_first=True, num_layers=self.number_of_layers)

        action_unembed_size = torch.Tensor([self.hidden_size, 200, self.num_classes])
        self.action_unembed = input_embedding_net(action_unembed_size.long().tolist(), dropout=args.dropout)

        assert self.input_length == self.sequence_length and 1 == self.output_length
    def __init__(self, args):
        super(MoCoIMUModel, self).__init__(args)

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus

        self.feature_extractor = FeatureLearnerModule(args)
        self.moco_feature_extractor = FeatureLearnerModule(args)

        self.feature_linear = nn.Linear(512, 128)
        self.moco_feature_linear = nn.Linear(512, 128)

        moment_update(self.feature_extractor, self.moco_feature_extractor,
                      0.0)  #Copy feature extractor to moco_feature
        moment_update(self.feature_linear, self.moco_feature_linear,
                      0.0)  #Copy feature extractor to moco_feature

        self.alpha = 0.999
        queue_size = 16384

        assert self.input_length == self.sequence_length == self.output_length

        imu_unembed_size = torch.Tensor(
            [self.hidden_size, 100, self.num_imus * 1])
        self.imu_unembed = input_embedding_net(
            imu_unembed_size.long().tolist(), dropout=args.dropout)

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.imu_embed_lstm = nn.LSTM(64 * 7 * 7,
                                      self.hidden_size,
                                      batch_first=True,
                                      num_layers=3)

        self.imu_decoder_lstm = nn.LSTM(64 * 7 * 7,
                                        self.hidden_size,
                                        batch_first=True,
                                        num_layers=3)

        self.contrast = MemoryMoCo(128, queue_size, 0.07)
        if args.gpu_ids != -1:
            self.contrast = self.contrast.cuda()
 def __init__(self, args):
     super(NeuralForceSimulator, self).__init__()
     self.clean_force = False
     # neural force simulator
     self.only_first_img_feature = True
     self.vis_grad = args.vis_grad
     self.train_res = args.train_res or self.vis_grad
     self.hidden_size = 512
     self.image_feature_dim = 512
     self.num_layers = 3
     self.sequence_length = args.sequence_length
     self.object_feature_size = 512
     self.environment = args.instance_environment
     self.number_of_cp = args.number_of_cp
     self.feature_extractor = resnet18(pretrained=args.pretrain)
     del self.feature_extractor.fc
     if not self.train_res:
         self.feature_extractor.eval()
     self.use_lstm = args.lstm
     self.norm_position = False
     if self.use_lstm:
         self.one_ns_layer = NSLSTM(
             hidden_size=self.hidden_size,
             layer_norm=False,
             image_feature_dim=self.image_feature_dim,
             norm_position=self.norm_position)
     else:
         self.one_ns_layer = NSWithImageFeature(
             hidden_size=self.hidden_size,
             layer_norm=False,
             image_feature_dim=self.image_feature_dim,
             norm_position=self.norm_position)
     self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
     input_object_embed_size = torch.Tensor(
         [3 + 4, 100, self.object_feature_size])
     self.input_object_embed = input_embedding_net(
         input_object_embed_size.long().tolist(),
         dropout=args.dropout_ratio)
     self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                 hidden_size=self.hidden_size,
                                 batch_first=True,
                                 num_layers=self.num_layers)
     # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names}
     assert self.number_of_cp == 5  # for five fingers
     self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
         args.data)
     if args.gpu_ids != -1:
         for obj, val in self.all_objects_keypoint_tensor.items():
             self.all_objects_keypoint_tensor[obj] = val.cuda()
     self.ns_ratio, self.phy_ratio, self.gt_ratio = 1, 1, 0
     total_r = self.ns_ratio + self.phy_ratio + self.gt_ratio
     self.ns_ratio, self.phy_ratio, self.gt_ratio = self.ns_ratio / total_r, self.phy_ratio / total_r, \
                                                    self.gt_ratio / total_r
Example #4
0
    def __init__(self, args):
        super(PredictInitPoseAndForce, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        # self.input_feature_size = self.image_feature_size + self.object_feature_size
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)

        predict_initial_pose_size = torch.Tensor([(2 + 3) * 10, 100, 3 + 4])
        self.predict_initial_pose = input_embedding_net(predict_initial_pose_size.long().tolist(), dropout=args.dropout_ratio)

        input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio)
        state_embed_size = torch.Tensor([EnvState.total_size + self.cp_feature_size, 100, self.object_feature_size])
        self.state_embed = input_embedding_net(state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor([self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers

        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
            global DEFAULT_IMAGE_SIZE
            DEFAULT_IMAGE_SIZE = DEFAULT_IMAGE_SIZE.cuda()
    def __init__(self, args):
        super(NoForceOnlyCPModel, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        feature_extractors = {'resnet18': resnet18, 'resnet50': resnet50}
        self.feature_extractor = feature_extractors[args.feature_extractor](
            pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()
        if args.feature_extractor == 'resnet18':
            self.image_feature_size = 512
        else:
            self.image_feature_size = 2048
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(self.image_feature_size, 64,
                                              args.dropout_ratio)
        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
    def __init__(self, args):
        super(CurrentMoveFromGazeImgModel, self).__init__(args)

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)
        self.embedding_input = nn.Linear(args.image_feature, args.hidden_size)

        self.input_feature_type = args.input_feature_type[0]

        imu_moves_unembed_size = torch.Tensor(
            [self.hidden_size, 100, self.num_imus * 1])
        self.imu_moves_unembed = input_embedding_net(
            imu_moves_unembed_size.long().tolist(), dropout=args.dropout)

        gaze_embed_size = torch.Tensor([2, 100, self.hidden_size])
        self.gaze_embed = input_embedding_net(gaze_embed_size.long().tolist(),
                                              dropout=args.dropout)

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.lstm = nn.LSTM(64 * 7 * 7 + 512,
                            self.hidden_size,
                            batch_first=True,
                            num_layers=3)

        self.decoder_lstm = nn.LSTM(64 * 7 * 7 + 512,
                                    self.hidden_size,
                                    batch_first=True,
                                    num_layers=3)

        assert self.input_length == self.sequence_length and self.input_length == self.output_length
Example #7
0
    def __init__(self, args):
        super(BaselineRegressForce, self).__init__(args)
        self.environment_layer = EnvWHumanCpFiniteDiffFast
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        input_object_embed_size = torch.Tensor([3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(input_object_embed_size.long().tolist(), dropout=args.dropout_ratio)
        contact_point_embed_size = torch.Tensor([3 * 5, 100, self.object_feature_size])
        self.contact_point_embed = input_embedding_net(contact_point_embed_size.long().tolist(), dropout=args.dropout_ratio)
        self.lstm_encoder = nn.LSTM(input_size=64 * 7 * 7 + 512, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers)

        force_decoder_size = torch.Tensor([self.hidden_size * 2, 100, (3) * self.number_of_cp])
        self.force_decoder = input_embedding_net(force_decoder_size.long().tolist(), dropout=args.dropout_ratio)
        assert (args.mode == 'train' and args.batch_size > 1 and args.break_batch == 1) or (args.mode != 'train' and args.batch_size == 1)

        self.train_mode = (args.mode == 'train')

        assert self.number_of_cp == 5  # for five fingers

        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()

        if not self.train_mode:
            BaselineRegressForce.metric += [
                metrics.ObjKeypointMetric,  # During test time add it
                metrics.ObjRotationMetric,
                metrics.ObjPositionMetric,
            ]
Example #8
0
    def __init__(self, args):
        super(WalkableModel, self).__init__(args)
        assert args.dropout == 0

        self.loss_function = args.loss

        self.fixed_feature_weights = args.fixed_feature_weights

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.depth_up1 = upshuffle(64, 256, 2, kernel_size=3, stride=1, padding=1)
        self.depth_up2 = upshuffle(256, 128, 2, kernel_size=3, stride=1, padding=1)
        self.depth_up3 = upshuffle(128, 64, 2, kernel_size=3, stride=1, padding=1)
        self.depth_up4 = upshuffle(64, 64, 2, kernel_size=3, stride=1, padding=1)
        self.depth_up5 = upshufflenorelu(64, 2, 2)

        self.feature_extractor = FeatureLearnerModule(args)
Example #9
0
    def __init__(self, args):
        super(VindModel, self).__init__(args)

        assert args.detach_level == 0

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)
        self.embedding_input = nn.Linear(args.image_feature, args.hidden_size)

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.conv1 = nn.Conv2d(1, 64, 7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 256, 7, stride=2, padding=3)
        self.bn2 = nn.BatchNorm2d(256)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(256, 64, 7, stride=2, padding=3)
        self.bn3 = nn.BatchNorm2d(64)

        vind_unembed_size = torch.Tensor(
            [64 * 7 * 7 * 2, 64 * 7 * 7, self.hidden_size, self.num_classes])
        self.vind_unembed = input_embedding_net(
            vind_unembed_size.long().tolist(), dropout=args.dropout)

        assert self.input_length == self.sequence_length == 1 == self.output_length
    def __init__(self, args):
        super(AutoEncoderModel, self).__init__(args)

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.reconst_resolution = args.reconst_resolution
        assert self.reconst_resolution == 224
        self.feature_sizes = [7, 14, 28, 56, 112, self.reconst_resolution]
        self.upscale_factor = [int(self.feature_sizes[i + 1]/ self.feature_sizes[i]) for i in range(len(self.feature_sizes) - 1)]


        self.up1 = upshuffle(64, 256, self.upscale_factor[0], kernel_size=3, stride=1, padding=1)
        self.up2 = upshuffle(256, 128, self.upscale_factor[1], kernel_size=3, stride=1, padding=1)
        self.up3 = upshuffle(128, 64, self.upscale_factor[2], kernel_size=3, stride=1, padding=1)
        self.up4 = upshuffle(64, 64, self.upscale_factor[3], kernel_size=3, stride=1, padding=1)
        self.up5 = upshufflenorelu(64, 3, self.upscale_factor[4])


        assert self.input_length == self.sequence_length and self.input_length == self.output_length and self.sequence_length == 1
    def __init__(self, args):
        super(SceneClassModel, self).__init__(args)

        assert args.detach_level == 0

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)
        self.embedding_input = nn.Linear(args.image_feature, args.hidden_size)

        self.input_feature_type = args.input_feature_type[0]

        self.pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        scene_unembed_size = torch.Tensor(
            [64 * 7 * 7, self.hidden_size, self.num_classes])
        self.scene_unembed = input_embedding_net(
            scene_unembed_size.long().tolist(), dropout=args.dropout)

        assert self.input_length == self.sequence_length == 1 == self.output_length
    def __init__(self, args):
        super(BatchCPHeatmapModel, self).__init__(args)

        self.use_syn = args.use_syn
        self.ori_w, self.ori_h = 1920, 1080
        self.environment_layer = BatchCPGradientLayer
        self.loss_function = args.loss
        self.relu = nn.LeakyReLU()
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids

        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc

        self.feature_extractor.eval()

        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3

        plane_dim = 1024 if self.use_syn else 512
        self.image_embed = combine_block_w_do(plane_dim, 64,
                                              args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            plane_dim, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            EnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'we do not use more than 1 batch size, but accumulate gradients of several steps.'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
    def __init__(self, args):
        super(ForcePredictor, self).__init__()
        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3
        self.sequence_length = args.sequence_length
        self.number_of_cp = args.number_of_cp
        self.use_gt_cp = args.use_gt_cp
        self.vis_grad = args.vis_grad
        self.train_res = args.train_res or self.vis_grad
        self.grad_value = None

        # force predictor networks.
        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc
        if not self.train_res:
            self.feature_extractor.eval()
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3
        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            512, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            NoGradEnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()
Example #14
0
    def __init__(self, args):
        super(JointNS, self).__init__(args)
        self.image_feature_size = 512
        self.object_feature_size = 512
        self.hidden_size = 512
        self.num_layers = 3

        self.loss_function = args.loss
        self.number_of_cp = args.number_of_cp
        self.environment = args.instance_environment
        self.sequence_length = args.sequence_length
        self.gpu_ids = args.gpu_ids
        self.all_obj_names = args.object_list
        self.use_gt_cp = args.use_gt_cp
        self.clean_force = True

        # configs w.r.t. two losses
        self.joint_two_losses = args.joint_two_losses
        self.loss1_or_loss2 = None
        if args.loss1_w < 0.00001:
            self.loss1_or_loss2 = False  # update loss2 only
        elif args.loss2_w < 0.00001:
            self.loss1_or_loss2 = True  # update loss1 only
        self.loss1_optim, self.loss2_optim, self.joint_optim = None, None, None

        # neural force simulator
        self.use_image_feature = True
        if not self.use_image_feature:
            self.one_ns_layer = MLPNS(hidden_size=64, layer_norm=False)
        else:
            self.one_ns_layer = NSWithImageFeature(hidden_size=64,
                                                   layer_norm=False,
                                                   image_feature_dim=512)
        # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names}

        # force predictor networks.
        self.feature_extractor = resnet18(pretrained=args.pretrain)
        del self.feature_extractor.fc
        self.feature_extractor.eval()
        self.input_feature_size = self.object_feature_size
        self.cp_feature_size = self.number_of_cp * 3
        self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio)
        self.contact_point_image_embed = combine_block_w_do(
            512, 64, args.dropout_ratio)

        input_object_embed_size = torch.Tensor(
            [3 + 4, 100, self.object_feature_size])
        self.input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)
        self.contact_point_input_object_embed = input_embedding_net(
            input_object_embed_size.long().tolist(),
            dropout=args.dropout_ratio)

        state_embed_size = torch.Tensor([
            NoGradEnvState.total_size + self.cp_feature_size, 100,
            self.object_feature_size
        ])
        self.state_embed = input_embedding_net(
            state_embed_size.long().tolist(), dropout=args.dropout_ratio)

        self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7,
                                    hidden_size=self.hidden_size,
                                    batch_first=True,
                                    num_layers=self.num_layers)

        self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size +
                                             64 * 7 * 7,
                                             hidden_size=self.hidden_size,
                                             batch_first=True,
                                             num_layers=self.num_layers)
        contact_point_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])
        self.contact_point_decoder = input_embedding_net(
            contact_point_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2,
                                        hidden_size=self.hidden_size)

        forces_directions_decoder_size = torch.Tensor(
            [self.hidden_size, 100, (3) * self.number_of_cp])

        self.forces_directions_decoder = input_embedding_net(
            forces_directions_decoder_size.long().tolist(),
            dropout=args.dropout_ratio)

        assert args.batch_size == 1, 'have not been implemented yet, because of the environment'

        assert self.number_of_cp == 5  # for five fingers
        self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors(
            args.data)
        if args.gpu_ids != -1:
            for obj, val in self.all_objects_keypoint_tensor.items():
                self.all_objects_keypoint_tensor[obj] = val.cuda()

        self.force_predictor_modules = [
            self.feature_extractor, self.image_embed,
            self.contact_point_image_embed, self.input_object_embed,
            self.contact_point_input_object_embed, self.state_embed,
            self.lstm_encoder, self.contact_point_encoder,
            self.contact_point_decoder, self.forces_directions_decoder
        ]

        # see gradients for debugging
        self.vis_grad = args.vis_grad
        self.grad_vis = None

        self.train_res = args.train_res or self.vis_grad
Example #15
0
    def __init__(self, args):
        super(ComplexAEGazeImuModel, self).__init__(args)

        self.image_size = args.image_size
        self.imus = args.imus

        self.input_length = args.input_length
        self.output_length = args.output_length
        self.sequence_length = args.sequence_length
        self.num_classes = args.num_classes
        self.gpu_ids = args.gpu_ids

        self.base_lr = args.base_lr
        self.image_feature = args.image_feature
        self.hidden_size = args.hidden_size
        self.imu_embedding_size = 30

        self.loss_function = args.loss

        self.relu = nn.LeakyReLU()
        self.num_imus = args.num_imus
        self.feature_extractor = FeatureLearnerModule(args)

        self.pointwise_conv = combine_block_w_do(512, 64,
                                                 dropout=0)  #Very important
        self.imu_pointwise_conv = combine_block_w_do(512, 64, args.dropout)
        self.gaze_pointwise_conv = combine_block_w_do(512, 64, args.dropout)

        self.reconst_resolution = args.reconst_resolution
        assert self.reconst_resolution == 56
        self.feature_sizes = [7, 14, 28, 56, 56, self.reconst_resolution]
        self.upscale_factor = [
            int(self.feature_sizes[i + 1] / self.feature_sizes[i])
            for i in range(len(self.feature_sizes) - 1)
        ]

        self.up1 = upshuffle(64,
                             256,
                             self.upscale_factor[0],
                             kernel_size=3,
                             stride=1,
                             padding=1)
        self.up2 = upshuffle(256,
                             128,
                             self.upscale_factor[1],
                             kernel_size=3,
                             stride=1,
                             padding=1)
        self.up3 = upshuffle(128,
                             64,
                             self.upscale_factor[2],
                             kernel_size=3,
                             stride=1,
                             padding=1)
        self.up4 = upshuffle(64,
                             64,
                             self.upscale_factor[3],
                             kernel_size=3,
                             stride=1,
                             padding=1)
        self.up5 = upshufflenorelu(64, 3, self.upscale_factor[4])

        gaze_unembed_size = torch.Tensor([self.hidden_size, 100, 2])
        self.gaze_unembed = input_embedding_net(
            gaze_unembed_size.long().tolist(), dropout=args.dropout)

        imu_unembed_size = torch.Tensor(
            [self.hidden_size, 100, self.num_imus * 1])
        self.imu_unembed = input_embedding_net(
            imu_unembed_size.long().tolist(), dropout=args.dropout)

        self.imu_embed_lstm = nn.LSTM(64 * 7 * 7,
                                      self.hidden_size,
                                      batch_first=True,
                                      num_layers=3)
        self.gaze_embed_lstm = nn.LSTM(64 * 7 * 7,
                                       self.hidden_size,
                                       batch_first=True,
                                       num_layers=3)

        self.gaze_decoder_lstm = nn.LSTM(64 * 7 * 7,
                                         self.hidden_size,
                                         batch_first=True,
                                         num_layers=3)
        self.imu_decoder_lstm = nn.LSTM(64 * 7 * 7,
                                        self.hidden_size,
                                        batch_first=True,
                                        num_layers=3)

        assert self.input_length == self.sequence_length == self.sequence_length