Ejemplo n.º 1
0
    def __init__(
            self,
            cfg_file=None,
            model_zoo='./data/torch_zoo/',
            exp_name='test',
            exp_idx=0,
            exp_dir='./data/exps/default/',
            gpu_idx=0,
            resume=True,
            seed=0,
            resume_epoch=-1,
            store_checkpoints=True,
            store_checkpoints_purge=3,
            batch_size=256,
            num_workers=8,
            visdom_env='',
            visdom_server='http://localhost',
            visdom_port=8097,
            metric_print_interval=5,
            visualize_interval=0,
            SOLVER=get_default_args(init_optimizer),
            DATASET=get_default_args(dataset_zoo),
            MODEL=get_default_args(C3DPO),
    ):

        self.cfg = get_default_args(ExperimentConfig)
        if cfg_file is not None:
            set_config_from_file(self.cfg, cfg_file)
        else:
            auto_init_args(self, tgt='cfg', can_overwrite=True)
        self.cfg = nested_attr_dict(self.cfg)
Ejemplo n.º 2
0
 def __init__(self,
              eval_only=False,
              exp_dir='./relate/',
              path_to_last='',
              gpu_idx=0,
              resume=True,
              clear_stats=False,
              clear_optimizer=False,
              seed=0,
              resume_epoch=-1,
              store_checkpoints=True,
              store_checkpoints_purge=1,
              batch_size=100,
              num_workers=4,
              visdom_env='',
              visdom_server='http://localhost',
              visdom_port=8097,
              metric_print_interval=30,
              visualize_interval=0,
              default_opts={'model': 'relate_static', 'dataset': 'clevr5',
                            'optimizer': 'adam'},
              SOLVER={},
              DATASET={},
              MODEL={}
              ):
     auto_init_args(self)
Ejemplo n.º 3
0
 def __init__(self, db_size=30000, db_dim=3, perc_replace=0.01):
     super().__init__()
     auto_init_args(self)
     db = torch.zeros(db_dim, db_size).float()
     self.db = Parameter(db)
     self.db.requires_grad = False
     self.pointer = 0
     self.uniform_sphere_sampling = False
Ejemplo n.º 4
0
    def __init__(self, df_dim=64, pos_dim=90, upsample=False):
        super(GAN_disc, self).__init__()
        auto_init_args(self)

        def spec_conv(in_channels, out_channels, k_size=5):
            return spectral_norm(
                nn.Conv2d(in_channels,
                          out_channels,
                          k_size,
                          stride=2,
                          padding=k_size // 2))

        self.disc_conv1 = nn.Conv2d(3,
                                    int(df_dim),
                                    5,
                                    stride=2,
                                    padding=5 // 2)
        self.lrelu1 = nn.LeakyReLU(negative_slope=0.2)
        self.disc_conv2 = spec_conv(int(df_dim), int(df_dim * 2))
        self.inorm1 = nn.Sequential(
            spectral_norm(nn.InstanceNorm2d(int(df_dim * 2), affine=True)),
            nn.LeakyReLU(negative_slope=0.2))
        self.disc_conv3 = spec_conv(int(df_dim * 2), int(df_dim * 4))
        self.inorm2 = nn.Sequential(
            spectral_norm(nn.InstanceNorm2d(int(df_dim * 4), affine=True)),
            nn.LeakyReLU(negative_slope=0.2))
        self.disc_conv4 = spec_conv(int(df_dim * 4), int(df_dim * 8))
        self.inorm3 = nn.Sequential(
            spectral_norm(nn.InstanceNorm2d(int(df_dim * 8), affine=True)),
            nn.LeakyReLU(negative_slope=0.2))
        self.disc_conv5 = spec_conv(int(df_dim * 8), int(df_dim * 16))
        self.inorm4 = nn.Sequential(
            spectral_norm(nn.InstanceNorm2d(int(df_dim * 16), affine=True)),
            nn.LeakyReLU(negative_slope=0.2))

        if self.upsample:
            self.disc_conv6 = spec_conv(int(df_dim * 16), int(df_dim * 16))
            self.inorm5 = nn.Sequential(
                spectral_norm(nn.InstanceNorm2d(int(df_dim * 16),
                                                affine=True)),
                nn.LeakyReLU(negative_slope=0.2))

        # Get all linear regressors
        self.class1 = nn.Linear(int(df_dim * 4), 1)
        self.class2 = nn.Linear(int(df_dim * 8), 1)
        self.class3 = nn.Linear(int(df_dim * 16), 1)
        self.class4 = nn.Linear(int(df_dim * 32), 1)

        self.dh4 = spectral_norm(nn.Linear(int(df_dim * 16 * 16), 1))
        self.enc = spectral_norm(nn.Linear(int(df_dim * 16 * 16),
                                           self.pos_dim))
Ejemplo n.º 5
0
    def __init__(self, latent_dims=[90, 30], gf_dim=64, c_dim=3):
        super(GAN_gen, self).__init__()
        auto_init_args(self)

        s_h, s_w = 64, 64
        s_h2, s_w2 = 32, 32
        s_h4, s_w4 = 16, 16

        self.bg_generator = AdaIngen_bg(latent_dims[0],
                                        gf_dim,
                                        f_dim=gf_dim * 2,
                                        lrelu=True)
        self.obj_generator = AdaIngen_obj(latent_dims[1],
                                          latent_dims[0],
                                          gf_dim,
                                          f_dim=gf_dim * 2,
                                          lrelu=True)

        self.deconv1 = nn.Sequential(
            nn.ConvTranspose2d(4 * self.gf_dim,
                               2 * self.gf_dim,
                               4,
                               stride=2,
                               padding=1), nn.LeakyReLU(negative_slope=0.2))
        self.deconv2 = nn.Sequential(
            nn.ConvTranspose2d(2 * self.gf_dim,
                               self.gf_dim,
                               4,
                               stride=2,
                               padding=1), nn.LeakyReLU(negative_slope=0.2))
        self.deconv3 = nn.Sequential(
            nn.ConvTranspose2d(self.gf_dim,
                               self.gf_dim,
                               3,
                               stride=1,
                               padding=1), nn.LeakyReLU(negative_slope=0.2))
        self.deconv4 = nn.Sequential(
            nn.ConvTranspose2d(self.gf_dim,
                               self.gf_dim,
                               4,
                               stride=2,
                               padding=1), nn.LeakyReLU(negative_slope=0.2))
        self.deconv5 = nn.Sequential(
            nn.ConvTranspose2d(self.gf_dim, self.c_dim, 3, stride=1,
                               padding=1))

        self.upsample_net = nn.Sequential(
            *[getattr(self, 'deconv%u' % i) for i in range(1, 6)])
Ejemplo n.º 6
0
    def __init__(
        self,
        jsonfile=None,
        train=True,
        limit_to=0,
        rand_sample=0,
        image_root=None,
        refresh_db=False,
    ):

        auto_init_args(self)

        self.load_db_file()

        has_classes = 'class_mask' in self.db[0]
        if has_classes:
            self.class_db = self.get_class_db()
        else:
            self.class_db = None
Ejemplo n.º 7
0
    def __init__(
        self,
        jsonfile=None,
        train=True,
        limit_to=0,
        limit_seq_to=-1,
        rand_sample=0,
        image_root=None,
        mask_root=None,
        depth_root=None,
        refresh_db=False,
        min_visible=0,
        subsample=1,
        load_images=True,
        load_depths=True,
        load_masks=True,
        image_height=9 * 20 * 2,
        image_width=9 * 20 * 2,
        dilate_masks=5,
        max_frame_diff=-1.,
        max_angle_diff=4.,
        kp_conf_thr=0.,
        nrsfm_model_outputs=None,
        box_crop_context=1.,
        box_crop=False,
    ):

        auto_init_args(self)

        self.load_db_file()

        has_classes = 'class_mask' in self.db[0]
        if has_classes:
            self.class_db = self.get_class_db()
        else:
            self.class_db = None

        self.get_transposed_db()
Ejemplo n.º 8
0
    def __init__( self, n_keypoints               = 17,
                        shape_basis_size          = 10,
                        mult_shape_by_class_mask  = False,
                        squared_reprojection_loss = False,
                        n_fully_connected         = 1024,
                        n_layers                  = 6,
                        keypoint_rescale          = float(1),
                        keypoint_norm_type        = 'to_mean',
                        projection_type           = 'orthographic',
                        z_augment                 = True,
                        z_augment_rot_angle       = float(np.pi),
                        z_equivariance            = False,
                        z_equivariance_rot_angle  = float(np.pi)/4, # < 0 means same as z_augment_rot_angle
                        compose_z_equivariant_rot = True, # TODO: remove this soon!
                        camera_translation        = False,
                        camera_xy_translation     = True,
                        argmin_translation        = False,
                        argmin_translation_test   = False,
                        argmin_translation_min_depth = 3.,
                        argmin_to_augmented       = False,
                        camera_scale              = False,
                        argmin_scale              = False,
                        argmin_scale_test         = False,
                        loss_normalization        = 'kp_total_count',
                        independent_phi_for_aug   = False,
                        shape_pred_wd             = 1.,
                        connectivity_setup        = 'NONE',
                        custom_param_groups       = False,
                        use_huber                 = False,
                        huber_scaling             = 0.1,
                        alpha_bias                = True,
                        canonicalization = {
                            'use':               False,
                            'n_layers':          6,
                            'n_rand_samples':    4,
                            'rot_angle':         float(np.pi),
                            'n_fully_connected': 1024,
                        },
                        linear_instead_of_conv       = False,
                        perspective_depth_threshold  = 0.1,
                        depth_offset                 = 0.,
                        replace_keypoints_with_input = False,
                        root_joint                   = 0,
                        loss_weights = { 'l_reprojection':     1.,
                                         'l_canonicalization': 1. },
                        log_vars = [ \
                            'objective',
                            'dist_reprojection',
                            'l_reprojection',
                            'l_canonicalization' ],
                        **kwargs ):
        super(C3DPO, self).__init__()

        # autoassign constructor params to self
        auto_init_args(self)

        # factorization net
        self.phi = nn.Sequential( \
            *make_trunk( dim_in=self.n_keypoints * 3 , # 2 dim loc, 1 dim visibility
                              n_fully_connected=self.n_fully_connected,
                              n_layers=self.n_layers ) )

        if linear_instead_of_conv:
            layer_init_fn = linear_layer
        else:
            layer_init_fn = conv1x1

        # shape coefficient predictor
        self.alpha_layer = layer_init_fn(self.n_fully_connected,
                                         self.shape_basis_size,
                                         init='normal0.01',
                                         cnv_args={
                                             'bias': self.alpha_bias,
                                             'kernel_size': 1
                                         })

        # 3D shape predictor
        self.shape_layer = layer_init_fn(self.shape_basis_size,
                                         3 * n_keypoints,
                                         init='normal0.01')

        # rotation predictor (predicts log-rotation)
        self.rot_layer = layer_init_fn(self.n_fully_connected,
                                       3,
                                       init='normal0.01')
        if self.camera_translation:
            # camera translation
            self.translation_layer = layer_init_fn(self.n_fully_connected,
                                                   3,
                                                   init='normal0.01')
        if self.camera_scale:
            # camera scale (non-negative predictions)
            self.scale_layer   = nn.Sequential(  \
                                layer_init_fn(self.n_fully_connected,1,init='normal0.01'),
                                nn.Softplus() )

        if self.canonicalization['use']:
            # canonicalization net:
            self.psi = nn.Sequential( \
                    *make_trunk( dim_in=self.n_keypoints*3 ,
                                      n_fully_connected=self.canonicalization['n_fully_connected'],
                                      n_layers=self.canonicalization['n_layers'] ) )
            self.alpha_layer_psi = conv1x1( \
                        self.n_fully_connected,
                        self.shape_basis_size,
                        init='normal0.01')
Ejemplo n.º 9
0
    def __init__(
            self,
            n_keypoints=17,
            shape_basis_size=10,
            n_fully_connected=1024,
            n_layers=6,
            keypoint_rescale=float(1),
            keypoint_norm_type='to_mean',
            projection_type='orthographic',
            z_augment=True,
            z_augment_rot_angle=float(np.pi) / 8,
            z_equivariance=True,
            z_equivariance_rot_angle=float(np.pi) / 8,
            camera_translation=False,
            camera_xy_translation=False,
            argmin_translation=False,
            camera_scale=False,
            connectivity_setup='NONE',
            huber_scaling=0.01,
            reprojection_normalization='kp_total_count',
            independent_phi_for_aug=False,
            canonicalization={
                'use': True,
                'n_layers': 6,
                'n_rand_samples': 4,
                'rot_angle': float(np.pi),
                'n_fully_connected': 1024,
            },
            perspective_depth_threshold=0.1,
            depth_offset=0.,
            replace_keypoints_with_input=True,
            root_joint=0,
            weight_init_std=0.01,
            loss_weights={
                'l_reprojection': 1.,
                'l_canonicalization': 1.
            },
            log_vars=[
                'objective', 'dist_reprojection', 'l_reprojection',
                'l_canonicalization'
            ],
            **kwargs):
        super(C3DPO, self).__init__()

        # autoassign constructor params to self
        auto_init_args(self)

        # factorization net
        self.phi = nn.Sequential(*self.make_trunk(
            dim_in=self.n_keypoints * 3,
            # 2 dim loc, 1 dim visibility
            n_fully_connected=self.n_fully_connected,
            n_layers=self.n_layers))

        # shape coefficient predictor
        self.alpha_layer = conv1x1(self.n_fully_connected,
                                   self.shape_basis_size,
                                   std=weight_init_std)

        # 3D shape predictor
        self.shape_layer = conv1x1(self.shape_basis_size,
                                   3 * n_keypoints,
                                   std=weight_init_std)

        # rotation predictor (predicts log-rotation)
        self.rot_layer = conv1x1(self.n_fully_connected,
                                 3,
                                 std=weight_init_std)
        if self.camera_translation:
            # camera translation
            self.translation_layer = conv1x1(self.n_fully_connected,
                                             3,
                                             std=weight_init_std)
        if self.camera_scale:
            # camera scale (with final sofplus to ensure positive outputs)
            self.scale_layer = nn.Sequential(
                conv1x1(self.n_fully_connected, 3, std=weight_init_std),
                nn.Softplus())

        if self.canonicalization['use']:
            # canonicalization net:
            self.psi = nn.Sequential(*self.make_trunk(
                dim_in=self.n_keypoints * 3,
                n_fully_connected=self.canonicalization['n_fully_connected'],
                n_layers=self.canonicalization['n_layers']))
            self.alpha_layer_psi = conv1x1(self.n_fully_connected,
                                           self.shape_basis_size,
                                           std=weight_init_std)
Ejemplo n.º 10
0
    def __init__(self,
                 latent_dims=[90, 30],
                 gf_dim=64,
                 c_dim=3,
                 upsample=False):
        super(GAN_gen, self).__init__()
        auto_init_args(self)

        # Objects and Background tensor generators
        self.bg_generator = AdaIngen_bg(latent_dims[0],
                                        gf_dim,
                                        f_dim=gf_dim * 2,
                                        lrelu=True)
        self.obj_generator = AdaIngen_obj(latent_dims[1],
                                          latent_dims[0],
                                          gf_dim,
                                          f_dim=gf_dim * 2,
                                          lrelu=True,
                                          upsample=upsample)

        self.deconv1 = nn.Sequential(
            nn.ConvTranspose2d(4 * self.gf_dim,
                               2 * self.gf_dim,
                               4 + int(upsample),
                               stride=2 - int(upsample),
                               padding=1 + int(upsample)),
            nn.LeakyReLU(negative_slope=0.2))
        self.deconv2 = nn.Sequential(
            nn.ConvTranspose2d(2 * self.gf_dim,
                               self.gf_dim,
                               4 + int(upsample),
                               stride=2 - int(upsample),
                               padding=1 + int(upsample)),
            nn.LeakyReLU(negative_slope=0.2))
        self.deconv3 = nn.Sequential(
            nn.ConvTranspose2d(self.gf_dim,
                               self.gf_dim,
                               3 + int(upsample),
                               stride=1 + int(upsample),
                               padding=1), nn.LeakyReLU(negative_slope=0.2))
        self.deconv4 = nn.Sequential(
            nn.ConvTranspose2d(self.gf_dim,
                               self.gf_dim,
                               4,
                               stride=2,
                               padding=1), nn.LeakyReLU(negative_slope=0.2))

        if not upsample:
            self.deconv5 = nn.Sequential(
                nn.ConvTranspose2d(self.gf_dim,
                                   self.c_dim,
                                   3,
                                   stride=1,
                                   padding=1))
        else:
            self.deconv5 = nn.Sequential(
                nn.ConvTranspose2d(self.gf_dim,
                                   self.gf_dim,
                                   4,
                                   stride=2,
                                   padding=1))
            self.deconv6 = nn.Sequential(
                nn.ConvTranspose2d(self.gf_dim,
                                   self.c_dim,
                                   3,
                                   stride=1,
                                   padding=1))

        self.upsample_net = nn.Sequential(*[
            getattr(self, 'deconv%u' % i) for i in range(1, 6 + int(upsample))
        ])
Ejemplo n.º 11
0
    def __init__(
            self,
            x_max=5.,
            y_max=5.,
            n_objects=2,
            backgd_dim=30,
            obj_dim=30,
            xy_mod=0.5,
            min_obj_num=3,
            fixed_objs=False,
            ablation_xy=False,
            coeff_xy=1.,
            obj_size=4,
            upsample=False,
            custom_param_groups=True,
            loss_weights={
                'l_gen': 1.,
                'l_gen_eval': 1.,
                'l_disc': 1.,
                'l_xy': 0.,
                'l_num': 0.,
                'l_style_disc': 0.,
                'l_style_gen': 0.,
                'l_style_gen_eval': 0.,
                'l_gradient': 0.,
            },
            log_vars=[
                'objective',
                'l_gen',
                'l_gen_eval',
                'l_disc',
                'l_style_disc',
                'l_style_gen',
                'l_style_gen_eval',
                'l_xy',
                'l_gradient',
            ],
            **kwargs):
        super(RELATEStat, self).__init__()

        # autoassign constructor params to self
        auto_init_args(self)

        # Use gpu if available
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.latent_dims = [self.backgd_dim, self.obj_dim]
        self.grad_pen = grad_pen = (loss_weights.get('l_gradient', 0) > 0.)

        self.zbg = Parameter(0.02 * torch.randn([
            1, 4 * 64, 16 * (1 + int(self.upsample)), 16 *
            (1 + int(self.upsample))
        ],
                                                device=self.device))
        self.zfg = Parameter(0.02 * torch.randn([
            1 + int(fixed_objs) *
            (n_objects - 1), 8 * 64, self.obj_size, self.obj_size
        ],
                                                device=self.device))

        # Init generator
        self.generator = GAN_gen(
            latent_dims=[self.latent_dims[0], self.latent_dims[1]],
            upsample=self.upsample)

        # Init discriminator
        self.discriminator = GAN_disc(pos_dim=2, upsample=self.upsample)

        # Gamma
        self.Gamma = NPE(self.latent_dims[1], self.latent_dims[0])

        # BCELoss init
        self.loss_bce = torch.nn.BCEWithLogitsLoss()

        # Init weights
        self.apply(weight_init)
Ejemplo n.º 12
0
	def __init__( self, 
				  trunk_arch='resnet50', 
				  n_upsample=2, 
				  hc_layers=[1,2,3,4], 
				  hcdim=512, 
				  pose_confidence=True,
				  depth_offset=0., 
				  smooth=False, 
				  encode_input_keypoints = False,
				  kp_encoding_sig=1., 
				  dimout=1,
				  dimout_glob = 0,
				  dimout_glob_alpha = 0,
				  n_keypoints=12, 
				  architecture='hypercolumns',
				  dilate_start=2, 
				  glob_inst_norm=False,
				  final_std=0.01,
				  final_bias=-1.,
				  glob_activation=True,
				  pretrained=True ):
		super().__init__()

		auto_init_args(self)

		trunk = getattr(torchvision.models,trunk_arch)(pretrained=pretrained)
		# nfc = trunk.fc.in_features

		self.layer0 = torch.nn.Sequential( trunk.conv1,
										   trunk.bn1,
										   trunk.relu,
										   trunk.maxpool )

		if self.architecture=='hypercolumns':

			for l in [1, 2, 3, 4]:
				lname = 'layer%d'%l
				setattr(self, lname, getattr(trunk,lname))

			for hcl in hc_layers:
				lname = 'hc_layer%d'%hcl
				indim = getattr(trunk,'layer%d'%hcl)[-1].conv1.in_channels
				
				# if ((self.dimout_glob + self.dimout_glob_alpha) > 0 \
				# 	and hcl==hc_layers[-1]):
				# 	if not self.smooth:
				# 		glob_layers = [ torch.nn.Conv2d(indim, indim,1,bias=True,padding=0),
				# 						torch.nn.ReLU(),
				# 						nn.Conv2d(indim, self.dimout_glob+self.dimout_glob_alpha, \
				# 						1, bias=True, padding=0) ]
				# 		if self.glob_activation:
				# 			glob_layers.insert(1, \
				# 				torch.nn.InstanceNorm2d(indim) if self.glob_inst_norm \
				# 					else torch.nn.BatchNorm2d(indim))
				# 	else:
				# 		glob_layers  = [ nn.Conv2d(indim, self.dimout_glob+self.dimout_glob_alpha, \
				# 						 1, bias=True, padding=0) ]
				# 	self.final_glob = torch.nn.Sequential(*glob_layers )

				if self.encode_input_keypoints:
					indim += self.n_keypoints
				
				if not self.smooth:
					layer_ = torch.nn.Sequential( \
								torch.nn.Conv2d(indim, hcdim, 3, bias=True, padding=1),
								torch.nn.BatchNorm2d(hcdim),
								torch.nn.ReLU(),
								torch.nn.Conv2d(hcdim, hcdim, 3, bias=True, padding=1),
								)
				else:
					layer_ = torch.nn.Sequential( \
								torch.nn.Conv2d(indim, hcdim, 3, bias=True, padding=1),
								)
				setattr(self, lname, layer_)

			if not self.smooth:
				up_layers = [ torch.nn.Conv2d(hcdim,hcdim,3,bias=True,padding=1),
							torch.nn.BatchNorm2d(hcdim),
							torch.nn.ReLU(),
							nn.Conv2d(hcdim, dimout, 3, bias=True, padding=1) ]
			else:
				up_layers = [ nn.Conv2d(hcdim, dimout, 3, bias=True, padding=1) ]

			llayer = up_layers[-1]
			llayer.weight.data = \
				llayer.weight.data.normal_(0., self.final_std)
			if self.final_bias > -1.:
				llayer.bias.data = \
					llayer.bias.data.fill_(self.final_bias)
			print('hcnet: final bias = %1.2e, final std=%1.2e' % \
						(llayer.bias.data.mean(),
						 llayer.weight.data.std())
						)
			self.final = torch.nn.Sequential(*up_layers)
		

		elif self.architecture=='dilated':

			if self.dimout_glob > 0:
				raise NotImplementedError('not done yet')

			# for l in [1, 2, 3, 4]:
			# 	lname = 'layer%d'%l
			# 	setattr(self, lname, getattr(trunk,lname))

			if self.encode_input_keypoints:
				c1 = self.layer0[0]
				wsz = list(c1.weight.data.shape)
				wsz[1] = self.n_keypoints
				c1_add = c1.weight.data.new_zeros( wsz ).normal_(0.,0.0001)
				c1.weight.data = torch.cat( (c1.weight.data, c1_add), dim=1 )
				c1.in_channels += self.n_keypoints

			layers = [self.layer0]

			li = 0
			for l in [1,2,3,4]:
				lname = 'layer%d'%l
				m = getattr(trunk,lname)
				if l >= self.dilate_start:
					for mm in m.modules():
						if type(mm) == torch.nn.Conv2d:
							mm.stride = (1,1)
							if mm.kernel_size==(3,3):
								dil = (li+2)**2
								mm.dilation = ( dil, dil )
								mm.padding  = ( dil, dil )
					li += 1
				layers.append(m)
				# setattr(self, lname, m)

			for m in layers[-1][-1].modules():
				if hasattr(m, 'out_channels'):
					lastdim = m.out_channels

			if True: # deconv for final layer (2x higher resol)
				layers.append( torch.nn.ConvTranspose2d( \
					lastdim, dimout, kernel_size=3, \
					stride=2, output_padding=1, padding=1, bias=True) )
			else: # classic conv
				layers.append( torch.nn.Conv2d( \
					lastdim, dimout, kernel_size=3, \
					stride=1, padding=1, bias=True) )
			layers[-1].weight.data = \
				layers[-1].weight.data.normal_(0., self.final_std)
			
			self.trunk = torch.nn.Sequential(*layers )

		self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
		self.std = torch.FloatTensor([0.229, 0.224, 0.225])
Ejemplo n.º 13
0
    def __init__(
            self,
            x_max=5.,
            y_max=5.,
            n_objects=2,
            backgd_dim=30,
            obj_dim=30,
            seq_len=5,
            fixed_objs=False,
            obj_size=6,
            past=3,
            len_ev=None,
            custom_param_groups=True,
            loss_weights={
                'l_gen': 1.,
                'l_gen_eval': 1.,
                'l_disc': 1.,
                'l_xy': 0.,
                'l_style_disc': 0.,
                'l_style_gen': 0.,
                'l_style_gen_eval': 0.,
                'l_gradient': 0.,
            },
            log_vars=[
                'objective',
                'l_gen',
                'l_gen_eval',
                'l_disc',
                'l_style_disc',
                'l_style_gen',
                'l_style_gen_eval',
                'l_xy',
                'l_gradient',
            ],
            **kwargs):
        super(RELATEVideo, self).__init__()

        # autoassign constructor params to self
        auto_init_args(self)

        # Use gpu if available
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.latent_dims = [self.backgd_dim, self.obj_dim]
        self.grad_pen = (loss_weights.get('l_gradient', 0) > 0.)

        self.zbg = Parameter(
            0.02 * torch.randn([1, 4 * 64, 16, 16], device=self.device))
        self.zfg = Parameter(0.02 * torch.randn([
            1 + self.fixed_objs *
            (self.n_objects - 1), 8 * 64, self.obj_size, self.obj_size
        ],
                                                device=self.device))

        # Init generator
        self.generator = GAN_gen(
            latent_dims=[self.latent_dims[0], self.latent_dims[1] + 2])

        # Init discriminator
        self.discriminator = GAN_disc(pos_dim=2, first=self.seq_len)

        # Init Gamma
        self.Gamma = NPE(
            self.latent_dims[1] + (self.past - 1) * self.latent_dims[1] *
            (1 - int(self.fixed_objs)) + 2 * (self.past - 1),
            self.latent_dims[0],
            out_dim=self.latent_dims[1] * (1 - int(self.fixed_objs)))

        # Init misc MLPs
        self.mlp_speed = nn.Sequential(
            nn.Linear(self.latent_dims[1] + 2 + self.latent_dims[0], 128),
            nn.LeakyReLU(negative_slope=0.2), nn.Linear(128, 128),
            nn.LeakyReLU(negative_slope=0.2), nn.Linear(128, 2 * past),
            nn.Tanh())

        self.mlp_xy = NPE(self.latent_dims[1], self.latent_dims[0])

        # BCELoss init
        self.loss_bce = torch.nn.BCEWithLogitsLoss()

        # Init weights
        self.apply(weight_init)
Ejemplo n.º 14
0
    def __init__(
            self,
            x_max=5.,
            y_max=5.,
            n_objects=2,
            backgd_dim=30,
            obj_dim=30,
            offset_mlp=0.3,
            ablation_xy=False,
            custom_param_groups=True,
            loss_weights={
                'l_gen': 1.,
                'l_gen_eval': 1.,
                'l_disc': 1.,
                'l_xy': 0.,
                'l_style_disc': 0.,
                'l_style_gen': 0.,
                'l_style_gen_eval': 0.,
                'l_gradient': 0.,
            },
            log_vars=[
                'objective', 'l_gen', 'l_gen_eval', 'l_disc', 'l_style_disc',
                'l_style_gen', 'l_style_gen_eval', 'l_xy', 'l_gradient'
            ],
            **kwargs):
        super(RELATEStatOrder, self).__init__()

        # autoassign constructor params to self
        auto_init_args(self)

        # Use gpu if available
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.latent_dims = [self.backgd_dim, self.obj_dim]
        self.grad_pen = (loss_weights.get('l_gradient', 0) > 0.)

        self.zbg = Parameter(
            0.02 * torch.randn([1, 4 * 64, 16, 16], device=self.device))
        self.zfg = Parameter(
            0.02 * torch.randn([1, 8 * 64, 4, 4], device=self.device))

        # Init generator
        self.generator = GAN_gen(latent_dims=self.latent_dims)

        # Init discriminator
        self.discriminator = GAN_disc(pos_dim=2)

        # Position predictors
        self.mlp_xy = nn.Sequential(nn.Linear(sum(self.latent_dims) + 2, 128),
                                    nn.LeakyReLU(negative_slope=0.2),
                                    nn.Linear(128, 64),
                                    nn.LeakyReLU(negative_slope=0.2),
                                    nn.Linear(64, 2))

        self.mlp_xy_rec = nn.Sequential(nn.Linear(sum(self.latent_dims), 128),
                                        nn.LeakyReLU(negative_slope=0.2),
                                        nn.Linear(128, 64),
                                        nn.LeakyReLU(negative_slope=0.2),
                                        nn.Linear(64, 2))

        # BCELoss init
        self.loss_bce = torch.nn.BCEWithLogitsLoss()

        # Init weights
        self.apply(weight_init)