Beispiel #1
0
    def __init__(self,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 n_classes=0,
                 use_spatial_softmax=True):
        super(BoundingBoxOnlyModel, self).__init__()
        # 6 for other oject pose (6D) and 12 for both anchor and other bb.
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        assert bb_size > 0
        self.bb_action_model = build_mlp(
            [bb_size + action_size, 512, 256, 64],
            activation='relu',
        )

        use_regression = (args.loss_type == 'regr')
        if use_regression:
            self.delta_pose_model = build_mlp(
                [64, 6],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            assert n_classes > 0, "Invalid number of classes."
            print("Using classif model with {} classes".format(n_classes))
            self.delta_pose_model = build_mlp([64, n_classes],
                                              activation='relu',
                                              final_nonlinearity=False)
Beispiel #2
0
    def __init__(self,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 n_classes=0,
                 use_spatial_softmax=True):
        super(UnscaledVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        if img_emb_size <= 36:
            self.voxels_to_rel_emb_model = CML_3(input_nc)
        else:
            self.voxels_to_rel_emb_model = CML_2(input_nc)

        if use_spatial_softmax:
            z_ss_size = 96
            self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder(
                input_nc, z_ss_size)
        else:
            z_ss_size = 0

        # 6 for other oject pose (6D) and 12 for both anchor and other bb.
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        self.img_action_model = build_mlp(
            [
                img_emb_size + bb_size + action_size + z_ss_size,
                img_emb_size // 2, img_emb_size // 2, img_emb_size // 4
            ],
            activation='relu',
        )

        use_regression = (args.loss_type == 'regr')
        if use_regression:
            output_size = 6
            self.delta_pose_model = build_mlp(
                [img_emb_size // 4, 64, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            assert n_classes > 0, "Invalid number of classes."
            print("Using classif model with {} classes".format(n_classes))
            self.delta_pose_model = build_mlp(
                [img_emb_size // 4, 64, n_classes],
                activation='relu',
                final_nonlinearity=False)
Beispiel #3
0
    def __init__(self,
                 resnet_klass,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 n_classes=0,
                 use_spatial_softmax=True):
        super(UnscaledResNetVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        self.resnet = resnet_klass(emb_size=img_emb_size,
                                   input_channels=input_nc)

        z_ss_size = 0
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        self.img_action_model = build_mlp(
            [
                img_emb_size + bb_size + action_size + z_ss_size,
                img_emb_size // 2, img_emb_size // 4
            ],
            activation='relu',
            final_nonlinearity=True,
        )

        use_regression = (args.loss_type == 'regr')
        if use_regression:
            output_size = 6
            self.delta_pose_model = build_mlp(
                [img_emb_size // 4, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            assert n_classes > 0, "Invalid number of classes."
            print("Using classif model with {} classes".format(n_classes))
            self.delta_pose_model = build_mlp([img_emb_size // 4, n_classes],
                                              activation='relu',
                                              final_nonlinearity=False)
Beispiel #4
0
    def __init__(self,
                 img_emb_size,
                 args,
                 use_bb_in_input=True,
                 n_classes=0,
                 use_spatial_softmax=True):
        super(UnscaledPrecondVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        self.voxels_to_rel_emb_model = CML_2(input_nc)
        if use_spatial_softmax:
            z_ss_size = 96
            self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder(
                input_nc, z_ss_size)
        else:
            z_ss_size = 0

        # 6 for other oject pose (6D) and 12 for both anchor and other bb.
        bb_size = 12 if args.use_bb_in_input else 0
        self.img_emb_pred_model = build_mlp(
            [img_emb_size + bb_size + z_ss_size, 256, 64, 1],
            activation='relu',
            final_nonlinearity=False)
Beispiel #5
0
    def __init__(self, img_emb_size, action_size, args, use_bb_in_input=True):
        super(SimpleModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels in (1, 2):
            input_nc = 5

        self.img_to_rel_emb_model = ConvImageEncoder(input_nc,
                                                     img_emb_size,
                                                     inp_size=256)
        z_ss_size = 128
        self.spatial_softmax_encoder = SpatialSoftmaxImageEncoder(
            input_nc, z_ss_size)

        # self.img_to_rel_emb_model = ResnetGenerator_Encoder(
        #     input_nc=input_nc,
        #     output_nc=3,
        #     n_blocks=1,
        #     ngf=2,
        #     n_downsampling=8,
        #     final_channels=img_emb_size//2)

        bb_size = 8 if args.use_bb_in_input else 0
        self.img_action_model = build_mlp(
            [
                img_emb_size + bb_size + action_size + z_ss_size, 256, 256,
                img_emb_size
            ],
            activation='relu',
            final_nonlinearity=True,
        )

        self.delta_pose_model = build_mlp(
            [img_emb_size, 128, 4],
            activation='relu',
            final_nonlinearity=False,
        )

        self.inv_model = build_mlp(
            [img_emb_size + 4, 256, 256, img_emb_size + z_ss_size])
Beispiel #6
0
    def __init__(self,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 n_classes=0,
                 use_spatial_softmax=True):
        super(SmallEmbUnscaledVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        # Why do we need an extra +3 ?
        # self.voxels_to_rel_emb_model = CML_4(input_nc, img_emb_size+3, out_emb=None)
        self.voxels_to_rel_emb_model = CML_4(input_nc,
                                             img_emb_size,
                                             out_emb=None)

        if use_spatial_softmax:
            z_ss_size = 96
            self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder(
                input_nc, z_ss_size)
        else:
            z_ss_size = 0

        # 6 for other oject pose (6D) and 12 for both anchor and other bb.
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        delta_pose_model_inp_size = img_emb_size \
                                        + bb_size \
                                        + action_size \
                                        + z_ss_size

        use_regression = (args.loss_type == 'regr')
        delta_pose_model_out_size = 6 if use_regression else n_classes
        if (delta_pose_model_inp_size // 4) >= 32:
            delta_pose_model_layers = [
                delta_pose_model_inp_size, delta_pose_model_inp_size // 2,
                delta_pose_model_inp_size // 4, delta_pose_model_out_size
            ]
        else:
            delta_pose_model_layers = [
                delta_pose_model_inp_size, delta_pose_model_inp_size // 2,
                delta_pose_model_out_size
            ]
        if use_regression:
            output_size = 6
            self.delta_pose_model = build_mlp(
                delta_pose_model_layers,
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            assert n_classes > 0, "Invalid number of classes."
            print("Using classif model with {} classes".format(n_classes))
            self.delta_pose_model = build_mlp(delta_pose_model_layers,
                                              activation='relu',
                                              final_nonlinearity=False)
Beispiel #7
0
    def __init__(self,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 use_spatial_softmax=True):
        super(UnscaledClassifVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        self.voxels_to_rel_emb_model = CML_2(input_nc)
        if use_spatial_softmax:
            z_ss_size = 96
            self.spatial_softmax_encoder = Spatial3DSoftmaxImageEncoder(
                input_nc, z_ss_size)
        else:
            z_ss_size = 0

        # 6 for other oject pose (6D) and 12 for both anchor and other bb.
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        self.img_action_model = build_mlp(
            [
                img_emb_size + bb_size + action_size + z_ss_size,
                img_emb_size // 2, img_emb_size // 2, img_emb_size // 4
            ],
            activation='relu',
        )

        if args.pos_loss_type == 'regr':
            output_size = 3
            self.delta_pose_model = build_mlp(
                [img_emb_size // 4, 64, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            n_classes = args.pos_classif_num_classes
            assert n_classes > 0
            self.delta_pos_x = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)
            self.delta_pos_y = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)
            self.delta_pos_z = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)

        if args.orient_loss_type == 'regr':
            output_size = 3
            self.delta_orient_model = build_mlp(
                [img_emb_size // 4, 64, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            n_classes = args.orient_classif_num_classes
            assert n_classes > 0
            self.delta_orient_x = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)
            self.delta_orient_y = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)
            self.delta_orient_z = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)
Beispiel #8
0
    def __init__(self,
                 resnet_klass,
                 img_emb_size,
                 action_size,
                 args,
                 use_bb_in_input=True,
                 use_spatial_softmax=True):
        super(UnscaledClassifResNetVoxelModel, self).__init__()
        self.img_emb_size = img_emb_size
        self.action_size = action_size
        self.use_spatial_softmax = use_spatial_softmax
        self.args = args

        if args.add_xy_channels == 0:
            input_nc = 3
        elif args.add_xy_channels == 1:
            input_nc = 6
        else:
            raise ValueError("Invalid add_xy_channels")

        self.resnet = resnet_klass(emb_size=img_emb_size,
                                   input_channels=input_nc)

        z_ss_size = 0
        bb_size = 6 + 12 if args.use_bb_in_input else 0
        self.img_action_model = build_mlp(
            [
                img_emb_size + bb_size + action_size + z_ss_size,
                img_emb_size // 2, img_emb_size // 4
            ],
            activation='relu',
            final_nonlinearity=True,
        )

        if args.pos_loss_type == 'regr':
            output_size = 3
            self.delta_pose_model = build_mlp(
                [img_emb_size // 4, 64, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            n_classes = args.pos_classif_num_classes
            assert n_classes > 0
            self.delta_pos_x = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)
            self.delta_pos_y = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)
            self.delta_pos_z = build_mlp([img_emb_size // 4, 64, n_classes],
                                         activation='relu',
                                         final_nonlinearity=False)

        if args.orient_loss_type == 'regr':
            output_size = 3
            self.delta_orient_model = build_mlp(
                [img_emb_size // 4, 64, output_size],
                activation='relu',
                final_nonlinearity=False,
            )
            print("Using regression model.")
        else:
            n_classes = args.orient_classif_num_classes
            assert n_classes > 0
            self.delta_orient_x = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)
            self.delta_orient_y = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)
            self.delta_orient_z = build_mlp([img_emb_size // 4, 64, n_classes],
                                            activation='relu',
                                            final_nonlinearity=False)