Ejemplo n.º 1
0
    def __init__(self, x, layers, num_components=100, device=None, old=False):
        super(VAE_bodies, self).__init__()

        self.device = device

        self.p = int(layers[0])  # Dimension of x
        self.d = int(layers[-1])  # Dimension of z
        self.h = layers  # [1:-1] # Dimension of hidden layers
        self.num_components = num_components

        enc = []
        for k in range(len(layers) - 1):
            in_features = int(layers[k])
            out_features = int(layers[k + 1])
            enc.append(
                nnj.ResidualBlock(nnj.Linear(in_features, out_features),
                                  nnj.Softplus()))
        enc.append(nnj.Linear(out_features, int(self.d * 2)))

        dec = []
        for k in reversed(range(len(layers) - 1)):
            in_features = int(layers[k + 1])
            out_features = int(layers[k])
            if not old:  # temporary to load old models TODO: delete
                if out_features != layers[0]:
                    dec.append(
                        nnj.ResidualBlock(
                            nnj.Linear(in_features, out_features),
                            nnj.Softplus()))
                else:
                    dec.append(
                        nnj.ResidualBlock(
                            nnj.Linear(in_features, out_features),
                            nnj.Sigmoid()))
            else:
                dec.append(
                    nnj.ResidualBlock(nnj.Linear(in_features, out_features),
                                      nnj.Softplus()))
                if out_features == layers[0]:
                    dec.append(nnj.Sigmoid())

        # Note how we use 'nnj' instead of 'nn' -- this gives automatic
        # computation of Jacobians of the implemented neural network.
        # The embed function is required to also return Jacobians if
        # requested; by using 'nnj' this becomes a trivial constraint.
        self.encoder = nnj.Sequential(*enc)

        self.decoder_loc = nnj.Sequential(*dec)
        self.init_decoder_scale = 0.01 * torch.ones(self.p, device=self.device)

        self.prior_loc = torch.zeros(self.d, device=self.device)
        self.prior_scale = torch.ones(self.d, device=self.device)
        self.prior = td.Independent(
            td.Normal(loc=self.prior_loc, scale=self.prior_scale), 1)

        # Create a blank std-network.
        # It is important to call init_std after training the mean, but before training the std
        self.dec_std = None

        self.to(self.device)
Ejemplo n.º 2
0
    def init_std(self,
                 x,
                 gmm_mu=None,
                 gmm_cv=None,
                 weights=None,
                 beta_constant=0.5,
                 beta_override=None,
                 inv_maxstd=7.5e-2,
                 n_samples=2,
                 num_components=None):
        self.beta_constant = beta_constant
        if num_components is not None:
            self.num_components = num_components
        N, D = x.shape
        with torch.no_grad():
            z = self.encode(x.to(self.device)).sample([n_samples]).reshape(
                n_samples * N, 2)
        d = z.shape[1]
        inv_maxstd = inv_maxstd  # 1.0 / x.std(dim=0).mean() # x.std(dim=0).mean() #D*x.var(dim=0).mean()

        if gmm_mu is None and gmm_cv is None and weights is None:
            from sklearn import mixture
            clf = mixture.GaussianMixture(n_components=self.num_components,
                                          covariance_type='spherical')
            clf.fit(z.cpu().numpy())
            self.gmm_means = clf.means_
            self.gmm_covariances = clf.covariances_
            self.clf_weights = clf.weights_
        else:
            print('loading weights...')
            self.gmm_means = gmm_mu
            self.gmm_covariances = gmm_cv
            self.clf_weights = weights
        if beta_override is None:
            self.beta = beta_constant / torch.tensor(
                self.gmm_covariances, dtype=torch.float, requires_grad=False)
        else:
            self.beta = beta_override
        self.beta = self.beta.to(self.device)
        self.dec_std = nnj.Sequential(
            nnj.RBF(d,
                    self.num_components,
                    points=torch.tensor(self.gmm_means,
                                        dtype=torch.float,
                                        requires_grad=False),
                    beta=self.beta),  # d --> num_components
            nnj.PosLinear(self.num_components, 1,
                          bias=False),  # num_components --> 1
            nnj.Reciprocal(inv_maxstd),  # 1 --> 1
            nnj.PosLinear(1, D)).to(self.device)  # 1 --> D
        with torch.no_grad():
            self.dec_std[1].weight[:] = (
                (torch.tensor(self.clf_weights, dtype=torch.float).exp() -
                 1.0).log()).to(self.device)
        self.dec_std
Ejemplo n.º 3
0
 def init_std_naive(self):
     dec = [
         nnj.Linear(self.latent_space, self.hidden_layer[-1]),
         nnj.Softplus()
     ]
     for i in reversed(range(1, len(self.hidden_layer))):
         dec.append(
             nnj.ResidualBlock(
                 nnj.Linear(self.hidden_layer[i], self.hidden_layer[i - 1]),
                 nnj.Softplus()))
     dec.extend([nnj.Linear(self.hidden_layer[0], 784)])
     self.decoder_std = nnj.Sequential(*dec).to(self.device)
Ejemplo n.º 4
0
    def __init__(self, hidden_layer=[512, 256], latent_space=2):
        super(BasicVAE, self).__init__()

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.hidden_layer = hidden_layer
        self.latent_space = latent_space
        self.prior_loc = torch.zeros(latent_space, device=self.device)
        self.prior_scale = torch.ones(latent_space, device=self.device)
        self.prior = td.Independent(
            td.Normal(loc=self.prior_loc, scale=self.prior_scale), 1)

        enc = [
            nnj.ResidualBlock(nnj.Linear(784, hidden_layer[0]), nnj.Softplus())
        ]
        for i in range(len(hidden_layer) - 1):
            enc.append(
                nnj.ResidualBlock(
                    nnj.Linear(hidden_layer[i], hidden_layer[i + 1]),
                    nnj.Softplus()))
        enc.append(nnj.Linear(hidden_layer[-1], int(latent_space * 2)))
        self.encoder = nnj.Sequential(*enc)

        dec = [
            nnj.ResidualBlock(nnj.Linear(2, hidden_layer[0]), nnj.Softplus())
        ]
        for i in reversed(range(1, len(hidden_layer))):
            dec.append(
                nnj.ResidualBlock(
                    nnj.Linear(hidden_layer[i], hidden_layer[i - 1]),
                    nnj.Softplus()))
        dec.extend([
            nnj.ResidualBlock(nnj.Linear(hidden_layer[0], 784), nnj.Sigmoid())
        ])
        self.decoder_loc = nnj.Sequential(*dec)
        #         self.decoder_loc = nnj.Sequential(nnj.ResidualBlock(nnj.Linear(hidden_layer[0], 784), nnj.Sigmoid()))

        self.init_decoder_scale = 0.01 * torch.ones(784, device=self.device)
        self.decoder_std = None
Ejemplo n.º 5
0
    def load_std(self,
                 x,
                 gmm_mu=None,
                 gmm_cv=None,
                 weights=None,
                 inv_maxstd=1e-1,
                 beta_constant=0.5,
                 beta_values=None,
                 z_override=None,
                 sigma=None):
        """ messy, needs clean separation between init and load """
        N, D = x.shape

        print('loading weights...')
        self.gmm_means = gmm_mu
        self.gmm_covariances = gmm_cv
        self.clf_weights = weights
        d = self.gmm_means.shape[1]

        if beta_values is None:
            beta = beta_constant.cpu() / torch.tensor(
                self.gmm_covariances, dtype=torch.float, requires_grad=False)
        else:
            beta = beta_values
        self.beta = beta.to(self.device)
        self.dec_std = nnj.Sequential(
            nnj.RBF(d,
                    self.num_components,
                    points=torch.tensor(self.gmm_means,
                                        dtype=torch.float,
                                        requires_grad=False),
                    beta=self.beta),  # d --> num_components
            nnj.PosLinear(self.num_components, 1,
                          bias=False),  # num_components --> 1
            nnj.Reciprocal(inv_maxstd),  # 1 --> 1
            nnj.PosLinear(1, D)).to(self.device)  # 1 --> D
Ejemplo n.º 6
0
    def init_std(self,
                 x,
                 gmm_mu=None,
                 gmm_cv=None,
                 weights=None,
                 inv_maxstd=1e-1,
                 beta_constant=0.5,
                 component_overwrite=None,
                 beta_override=None,
                 n_samples=2,
                 z_override=None,
                 sigma=None):
        if component_overwrite is not None:
            self.num_components = component_overwrite
        if z_override is None:
            with torch.no_grad():
                mu, lv = torch.chunk(self.encoder(x.to(self.device)),
                                     chunks=2,
                                     dim=-1)
                z = td.Normal(loc=mu, scale=lv.mul(0.5).exp() + 1e-10).sample(
                    [n_samples])
                z = z.reshape(int(x.shape[0] * n_samples), z.shape[-1])
        else:
            z = z_override
        N, D = x.shape
        d = z.shape[1]
        inv_maxstd = inv_maxstd  # 1.0 / x.std(dim=0).mean() # x.std(dim=0).mean() #D*x.var(dim=0).mean()

        if gmm_mu is None and gmm_cv is None and weights is None:
            from sklearn import mixture
            clf = mixture.GaussianMixture(n_components=self.num_components,
                                          covariance_type='spherical')
            clf.fit(z.cpu().numpy())
            self.gmm_means = clf.means_
            self.gmm_covariances = clf.covariances_
            self.clf_weights = clf.weights_
        else:
            print('loading weights...')
            self.gmm_means = gmm_mu
            self.gmm_covariances = gmm_cv
            self.clf_weights = weights
        if beta_override is None:
            beta = beta_constant.cpu() / torch.tensor(
                self.gmm_covariances, dtype=torch.float, requires_grad=False)
        else:
            beta = beta_override
        self.beta = beta.to(self.device)
        self.dec_std = nnj.Sequential(
            nnj.RBF(d,
                    self.num_components,
                    points=torch.tensor(self.gmm_means,
                                        dtype=torch.float,
                                        requires_grad=False),
                    beta=self.beta),  # d --> num_components
            nnj.PosLinear(self.num_components, 1,
                          bias=False),  # num_components --> 1
            nnj.Reciprocal(inv_maxstd),  # 1 --> 1
            nnj.PosLinear(1, D)).to(self.device)  # 1 --> D
        if sigma is not None:
            self.dec_std[0] = nnj.RBF_variant(
                d,
                self.gmm_means.shape[0],
                points=torch.tensor(self.gmm_means,
                                    dtype=torch.float,
                                    requires_grad=False),
                beta=self.beta.requires_grad_(False),
                boxwidth=sigma).to(self.device)
        with torch.no_grad():
            self.dec_std[1].weight[:] = (
                (torch.tensor(self.clf_weights, dtype=torch.float).exp() -
                 1.0).log()).to(self.device)