예제 #1
0
 def check_gaussian_kl_divergence(self, mean, ln_var):
     if self.wrap_m:
         mean = chainer.Variable(mean)
     if self.wrap_v:
         ln_var = chainer.Variable(ln_var)
     actual = cuda.to_cpu(
         F.gaussian_kl_divergence(mean, ln_var, self.reduce).data)
     actual = cuda.to_cpu(
         F.gaussian_kl_divergence(mean, ln_var, self.reduce).data)
     testing.assert_allclose(self.expect, actual)
예제 #2
0
 def check_gaussian_kl_divergence(self, mean, ln_var):
     if self.wrap_m:
         mean = chainer.Variable(mean)
     if self.wrap_v:
         ln_var = chainer.Variable(ln_var)
     actual = cuda.to_cpu(
         F.gaussian_kl_divergence(mean, ln_var, self.reduce).data)
     actual = cuda.to_cpu(
         F.gaussian_kl_divergence(mean, ln_var, self.reduce).data)
     testing.assert_allclose(self.expect, actual)
예제 #3
0
    def __call__(self, x, test=False, k=4):

        batch_size = x.data.shape[0]
        w = x.data.shape[2]
        tr, tg, tb = chainer.functions.split_axis(x, 3, 1)
        tr = F.reshape(tr, (batch_size * w * w, ))
        tg = F.reshape(tg, (batch_size * w * w, ))
        tb = F.reshape(tb, (batch_size * w * w, ))

        x = chainer.Variable(x.data.astype('f'))

        z_mu, z_var = self.enc(x, test)
        loss_kl = F.gaussian_kl_divergence(z_mu, z_var) / batch_size / self.k

        loss_decode = 0
        for _ in range(k):
            z = F.gaussian(z_mu, z_var)
            r, g, b = self.dec(z, test)
            r = F.transpose(r, (0, 2, 3, 1))
            r = F.reshape(r, (batch_size * w * w, 256))
            g = F.transpose(g, (0, 2, 3, 1))
            g = F.reshape(g, (batch_size * w * w, 256))
            b = F.transpose(b, (0, 2, 3, 1))
            b = F.reshape(b, (batch_size * w * w, 256))
            loss_decode += F.softmax_cross_entropy(r, tr) / k
            loss_decode += F.softmax_cross_entropy(g, tg) / k
            loss_decode += F.softmax_cross_entropy(b, tb) / k

        chainer.report({'loss_kl': loss_kl, 'loss_decode': loss_decode}, self)

        beta = 0.2
        return beta * loss_kl + (1 - beta) * loss_decode
예제 #4
0
    def pretrain_step_vrae(self, x_input):
        """
        Maximum likelihood Estimation

        :param x_input:
        :return: loss
        """
        batch_size = len(x_input)
        _, mu_z, ln_var_z = self.encoder.encode(x_input)

        z = F.gaussian(mu_z, ln_var_z)

        self.reset_state()
        accum_loss = 0
        self.lstm1.h = z
        for i in range(self.sequence_length):
            if i == 0:
                x = chainer.Variable(
                    self.xp.asanyarray([self.start_token] * batch_size,
                                       'int32'))
            else:
                x = chainer.Variable(
                    self.xp.asanyarray(x_input[:, i - 1], 'int32'))

            scores = self.decode_one_step(x)
            loss = F.softmax_cross_entropy(
                scores,
                chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32')))
            accum_loss += loss

        dec_loss = accum_loss / self.sequence_length
        kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size
        return dec_loss, kl_loss
예제 #5
0
 def sample_g0(self, zs):
     mu = self.l_g0_mu(zs)
     ln_var = self.l_g0_ln_var(zs)
     g_0 = F.gaussian(mu, ln_var)
     batchsize = len(mu.data)
     kl_g0 = gaussian_kl_divergence(mu, ln_var) / batchsize
     return g_0, kl_g0
예제 #6
0
    def update_core(self):
        batch = self._iterators['main'].next()
        x = Variable(self.converter(batch, self.device))
        xp = cuda.get_array_module(x.data)

        enc = self.enc
        opt_enc = self._optimizers['enc']
        dec = self.dec
        opt_dec = self._optimizers['dec']

        mu, ln_var = enc(x)

        batchsize = len(mu.data)
        rec_loss = 0
        k = 10
        for l in range(k):
            z = F.gaussian(mu, ln_var)
            rec_loss += F.bernoulli_nll(x, dec(
                z, sigmoid=False)) / (k * batchsize)

        loss = rec_loss + 1.0 * F.gaussian_kl_divergence(mu,
                                                         ln_var) / batchsize

        enc.cleargrads()
        dec.cleargrads()
        loss.backward()
        opt_enc.update()
        opt_dec.update()

        chainer.report({'rec_loss': rec_loss})
        chainer.report({'loss': loss})
예제 #7
0
    def _forward(self, batch, test=False):

        # TrainingSetのEncodeとDecode
        encoded, means, ln_vars = self._encode(batch, test=test)
        rec = self._decode(encoded, test=test)
        normer = reduce(lambda x, y: x*y, means.data.shape) # データ数
        kl_loss = F.gaussian_kl_divergence(means, ln_vars)/normer
        #print 'means={}'.format(means.data.shape)
        #print 'ln_vars={}'.format(ln_vars.data.shape)
        #print 'kl_loss={}, normer={}'.format(kl_loss.data, normer)

        # zのサンプル
        samp_p = np.random.standard_normal(means.data.shape).astype('float32')
        z_p = chainer.Variable(samp_p)

        if self.flag_gpu:
            z_p.to_gpu()

        rec_p = self._decode(z_p)

        disc_rec, conv_layer_rec = self.disc(rec, test=test, dropout_ratio=self.dropout_ratio)

        disc_batch, conv_layer_batch = self.disc(batch, test=test, dropout_ratio=self.dropout_ratio)

        disc_x_p, conv_layer_x_p = self.disc(rec_p, test=test, dropout_ratio=self.dropout_ratio)

        dif_l = F.mean_squared_error(conv_layer_rec, conv_layer_batch)

        return kl_loss, dif_l, disc_rec, disc_batch, disc_x_p
예제 #8
0
    def reconst(self, data, unregular=False):
        if data.ndim == 1:
            data = data.reshape(1,-1)

        with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
            e_mu,e_var = self.encode(Variable(data))
            feat = F.gaussian(e_mu, e_var).data
            d_out = self.decode(Variable(feat))

        if self.is_gauss_dist:
            rec = d_out[0].data
            if unregular: # 非正則化項のやつ
                d_mu, d_var = d_out
                D_VAE = F.gaussian_kl_divergence(e_mu, e_var)
                A_VAE = 0.5* ( np.log(2*np.pi) + d_var )
                M_VAE = 0.5* ( data-d_mu )**2 * F.exp(-d_var)
                return feat, rec, M_VAE.data
        else:
            rec = F.sigmoid(d_out).data
        mse = np.mean( (rec-data)**2, axis=1 )

        # lat_loss = F.gaussian_kl_divergence(e_mu, e_var)
        # rec_loss = F.bernoulli_nll( Variable(data), d_out )
        # vae_err = (lat_loss+rec_loss).data

        return feat, rec, mse
예제 #9
0
 def lf(frames):
     mu, ln_var = self.encode(frames)
     z = F.gaussian(mu, ln_var)
     frames_flat = F.reshape(
         frames,
         (-1, frames.shape[1] * frames.shape[2] * frames.shape[3]))
     variational_flat = F.reshape(
         self.decode(z),
         (-1, frames.shape[1] * frames.shape[2] * frames.shape[3]))
     rec_loss = F.sum(F.square(frames_flat - variational_flat),
                      axis=1)  # l2 reconstruction loss
     rec_loss = F.mean(rec_loss)
     kl_loss = F.sum(F.gaussian_kl_divergence(mu, ln_var, reduce="no"),
                     axis=1)
     if self._cpu:
         kl_tolerance = np.asarray(self.kl_tolerance *
                                   self.n_latent).astype(np.float32)
     else:
         kl_tolerance = cp.asarray(self.kl_tolerance *
                                   self.n_latent).astype(cp.float32)
     kl_loss = F.maximum(kl_loss,
                         F.broadcast_to(kl_tolerance, kl_loss.shape))
     kl_loss = F.mean(kl_loss)
     loss = rec_loss + kl_loss
     chainer.report({'loss': loss}, observer=self)
     chainer.report({'kl_loss': kl_loss}, observer=self)
     chainer.report({'rec_loss': rec_loss}, observer=self)
     return loss
예제 #10
0
    def __call__(self, xs, ys):
        eos = self.xp.array([EOS], 'i')

        xs = [self.denoiseInput(x[::-1], self.denoising_rate)
              for x in xs]  # denoising

        #ys_d = [self.wordDropout(y, self.word_dropout) for y in ys] # word dropout
        ys_d = [self.denoiseInput(y, self.word_dropout)
                for y in ys]  # word dropout
        ys_in = [F.concat([eos, y], axis=0) for y in ys_d]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # Both xs and ys_in are lists of arrays.
        exs = sequence_embed(self.embed_x, xs)
        eys = sequence_embed(self.embed_y, ys_in)

        batch = len(xs)
        # None represents a zero vector in an encoder.
        hx, at = self.encoder(None, exs)  # layer x batch x n_units
        hx_t = F.transpose(hx, (1, 0, 2))  # batch x layer x n_units
        mu = self.W_mu(hx_t)  # batch x n_latent
        ln_var = self.W_ln_var(hx_t)
        #print('{},{}'.format(mu.shape,ln_var.shape))
        #print(hx_t.shape)

        rec_loss = 0
        concat_ys_out = F.concat(ys_out, axis=0)
        for _ in range(self.k):
            z = F.gaussian(mu, ln_var)
            z_e = F.expand_dims(z, 2)  # batch x n_latent x 1
            Wz = self.W_h(z_e)  # batch x (layer x unit)
            #print('Wz: {}, {}'.format(Wz.shape, type(Wz)))
            hys = F.split_axis(Wz, self.n_layers, 1)  # layer x batch x unit
            #print('hys, {}'.format([x.shape for x in hys]))
            c_hy = F.concat([F.expand_dims(hy, 0) for hy in hys],
                            0)  # layer x batch x unit
            #print('c_hy: {}'.format(c_hy.shape))
            _, os = self.decoder(c_hy, eys)
            #print(len(os))
            concat_os = F.concat(os, axis=0)
            rec_loss += F.sum(
                F.softmax_cross_entropy(self.W(concat_os),
                                        concat_ys_out,
                                        reduce='no')) / (self.k * batch)
        latent_loss = F.gaussian_kl_divergence(mu, ln_var) / batch
        loss = rec_loss + self.C * latent_loss

        # wy = self.W(concat_os)
        # ys = self.xp.argmax(wy.data, axis=1).astype('i')
        # print(ys)

        chainer.report({'loss': loss.data}, self)
        chainer.report({'rec': rec_loss.data}, self)
        chainer.report({'lat': latent_loss.data}, self)
        n_words = concat_ys_out.shape[0]
        perp = self.xp.exp(loss.data * batch / n_words)
        chainer.report({'perp': perp}, self)

        return loss
예제 #11
0
 def free_energy(self, x):
     #return -(free energy)
     enc_mu, enc_log_sigma_2 = self.encode(x)
     kl = F.gaussian_kl_divergence(enc_mu, enc_log_sigma_2)
     z = F.gaussian(enc_mu, enc_log_sigma_2)
     dec_mu = self.decode(z)
     nll = F.bernoulli_nll(x, dec_mu)
     return nll + kl
예제 #12
0
파일: vae_model.py 프로젝트: clarken92/vae
 def free_energy(self,x):
     #return -(free energy)
     enc_mu, enc_log_sigma_2 = self.encode(x)
     kl = F.gaussian_kl_divergence(enc_mu,enc_log_sigma_2)
     z = F.gaussian(enc_mu,enc_log_sigma_2)
     dec_mu = self.decode(z)
     nll = F.bernoulli_nll(x,dec_mu)
     return nll+kl
예제 #13
0
    def compute_loss(self, x1, x2, t):
        '''
		Compute both encoders losses and decoder loss
		Use KL divergence for encoder and Bernoulli loss for decoder
		Input: two images and target (composition of both images)
		Output: decoder loss, encoder1 loss, encoder2 loss
		'''
        mu1, ln_std1, mu2, ln_std2 = self.encode(x1, x2)
        kl1 = F.gaussian_kl_divergence(mu1, ln_std1)
        kl2 = F.gaussian_kl_divergence(mu2, ln_std2)
        sample1 = F.gaussian(mu1, ln_std1)
        sample2 = F.gaussian(mu2, ln_std2)

        sample = F.concat((sample1, sample2))
        output = self.decode(sample)
        nll = F.bernoulli_nll(F.reshape(t, (t.shape[0], 1, 32, 32)), output)
        return nll / (t.shape[0] * 32 *
                      32), kl1 / (x1.shape[0] * 32), kl2 / (x2.shape[0] * 32)
예제 #14
0
파일: vae.py 프로젝트: hillbig/dgen
 def __call__(self, x):
     xp = self.encoder.xp
     x = Variable(xp.asarray(x))
     zm, zv = self.encoder((x,))
     z = F.gaussian(zm, zv)
     mean, ln_var = self.decoder((z,))
     kl_loss = F.gaussian_kl_divergence(zm, zv)
     nll_loss = F.gaussian_nll(x, mean, ln_var)
     loss = kl_loss + nll_loss
     return loss
예제 #15
0
    def forward(self, batch, test=False):

        out, means, ln_vars = self.encode(batch, test=test)
        out = self.decode(out, test=test)
        normer = reduce(lambda x, y: x * y, means.data.shape)

        kl_loss = F.gaussian_kl_divergence(means, ln_vars) / normer
        rec_loss = F.mean_squared_error(batch, out)

        return out, kl_loss, rec_loss
예제 #16
0
 def encode(self, bow):
     """ Convert the bag of words vector of shape (n_docs, n_vocab)
     into latent mean log variance vectors.
     """
     lam = F.relu(self.l1(bow))
     pi = F.relu(self.l2(lam))
     mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1)
     sample = F.gaussian(mu, log_sigma)
     loss = F.gaussian_kl_divergence(mu, log_sigma)
     return sample, loss
예제 #17
0
def update(net, optimizer, x):
    xp = cuda.get_array_module(x)
    div_weight = 1

    y, mean, var = net(x)
    loss = F.mean_squared_error(x, y) + div_weight * F.gaussian_kl_divergence(mean, var) / float(y.size)
    net.cleargrads()
    loss.backward()
    optimizer.update()
    return loss
예제 #18
0
    def forward(self, batch, test=False):

        out, means, ln_vars = self.encode(batch, test=test)
        out = self.decode(out, test=test)
        normer = reduce(lambda x, y: x*y, means.data.shape)

        kl_loss = F.gaussian_kl_divergence(means, ln_vars)/normer
        rec_loss = F.mean_squared_error(batch, out)

        return out, kl_loss, rec_loss
예제 #19
0
파일: vae.py 프로젝트: hillbig/dgen
    def __call__(self, x):
        x = Variable(x)
        start = time.time()
        zm, zv = self.encoder((x,))
        z = F.gaussian(zm, zv)
        y = self.decoder((z,))[0]
        kl_loss = F.gaussian_kl_divergence(zm, zv)
        nll_loss = F.bernoulli_nll(x, y)

        loss = kl_loss + nll_loss
        return loss
예제 #20
0
파일: VAE.py 프로젝트: capp365/VAE
 def __call__(self, x, l):
     mu, sigma = self.encoder(x)
     self.KL = F.gaussian_kl_divergence(mu, sigma)
     self.loss = Variable(np.array(0, dtype=np.float32))
     for i in range(l):
         sample = F.gaussian(mu, sigma)
         m, s = self.decoder(sample)
         self.loss += F.gaussian_nll(x, m, s)
     self.loss = self.loss / l + self.KL
     self.loss = self.loss / len(x)
     return self.loss
예제 #21
0
 def get_loss(self, x, y, train=True):
     mu, ln_var = self.encode(x, y)
     batchsize = len(mu.data)
     # reconstruction loss
     rec_loss = 0
     z = F.gaussian(mu, ln_var)
     rec_loss += F.bernoulli_nll(y, self.decode(z, x)) / (batchsize)
     self.rec_loss = rec_loss
     self.loss = self.rec_loss + F.gaussian_kl_divergence(
         mu, ln_var) / batchsize
     return self.loss
    def loss(self, x, y):
        batch_size = len(x)
        mu, ln_var = self._latent_distribution(x)

        z = self._sample(mu, ln_var)

        reconstruction_loss = F.mean_squared_error(x, self.decode(z))
        latent_loss = 0.0005 * F.gaussian_kl_divergence(mu,
                                                        ln_var) / batch_size
        loss = reconstruction_loss + latent_loss

        return loss
예제 #23
0
    def pretrain_step_vrae_tag(self,
                               x_input,
                               tag,
                               word_drop_ratio=0.0,
                               train=True):
        """
        Maximum likelihood Estimation

        :param x_input:
        :return: loss
        """
        batch_size = len(x_input)
        _, mu_z, ln_var_z = self.encoder.encode_with_tag(x_input, tag, train)

        self.reset_state()

        if self.latent_dim:
            z = F.gaussian(mu_z, ln_var_z)
        else:
            latent = F.gaussian(mu_z, ln_var_z)
            tag_ = self.tag_embed(
                chainer.Variable(self.xp.array(tag, 'int32'),
                                 volatile=not train))
            self.lstm1.h = self.dec_input(F.concat((latent, tag_)))
            z = None

        accum_loss = 0
        for i in range(self.sequence_length):
            if i == 0:
                x = chainer.Variable(self.xp.asanyarray([self.start_token] *
                                                        batch_size, 'int32'),
                                     volatile=not train)
            else:
                if np.random.random() < word_drop_ratio and train:
                    x = chainer.Variable(self.xp.asanyarray(
                        [self.start_token] * batch_size, 'int32'),
                                         volatile=not train)
                else:
                    x = chainer.Variable(self.xp.asanyarray(
                        x_input[:, i - 1], 'int32'),
                                         volatile=not train)

            scores = self.decode_one_step(x, z=z)
            loss = F.softmax_cross_entropy(
                scores,
                chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32'),
                                 volatile=not train))
            accum_loss += loss

        dec_loss = accum_loss
        kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size
        return dec_loss, kl_loss
예제 #24
0
    def forward(self, hs):
        data_len = len(hs)

        mu = self.l1_mu(hs)
        ln_var = self.l1_ln_var(hs)

        mu = F.leaky_relu(mu, slope=0.2)
        ln_var = F.leaky_relu(ln_var, slope=0.2)

        zs = F.gaussian(mu, ln_var)
        loss = F.gaussian_kl_divergence(mu, ln_var) / data_len

        return zs, loss
예제 #25
0
 def lf(self, x, mu, ln_var, split=False):
     batchsize = len(mu.data)
     # reconstruction loss
     rec_loss = 0
     for l in range(k):
         z = F.gaussian(mu, ln_var)
         rec_loss += F.bernoulli_nll(x, self.decoder_model(z, sigmoid=False)) / (k * batchsize)
     rec_loss = rec_loss
     kl_loss = C * F.gaussian_kl_divergence(mu, ln_var) / batchsize
     loss = rec_loss +  kl_loss
     if split:
         return rec_loss, kl_loss
     else:
         return loss
예제 #26
0
 def __call__(self, *args, beta=1.0):
     assert len(args) >= 2
     x = args[:-1]
     t = args[-1]
     mu_e, ln_var_e = self.predictor.encode(*x)
     batchsize = len(mu_e.data)
     rec_loss = 0
     for l in six.moves.range(self.k):
         z = F.gaussian(mu_e, ln_var_e)
         mu_d, ln_var_d = self.predictor.decode(z)
         rec_loss += F.gaussian_nll(t, mu_d, ln_var_d) / (self.k * batchsize)
     kl_loss = beta * F.gaussian_kl_divergence(mu_e, ln_var_e) / batchsize
     self.loss = rec_loss + kl_loss
     reporter_module.report({'loss': self.loss}, self)
     return self.loss
예제 #27
0
파일: vfm.py 프로젝트: jilljenn/vfm
    def term_feat(self, iloc, jloc, ival, jval, bs, nf, train=True):
        # Change all of the shapes to form interaction vectors
        shape = (bs, nf * 2, self.n_dim)
        feat_mu_vec = F.broadcast_to(self.feat_mu_vec.b, shape)
        feat_lv_vec = F.broadcast_to(self.feat_lv_vec.b, shape)
        if not train:
            feat_lv_vec += self.lv_floor

        # Construct the interaction mean and variance
        # iloc is (bs, nf), feat(iloc) is (bs, nf, ndim) and
        # dot(feat, feat) is (bs, nf)
        ivec = F.gaussian(feat_mu_vec + self.feat_delta_mu(iloc),
                          feat_lv_vec + self.feat_delta_lv(iloc))
        jvec = F.gaussian(feat_mu_vec + self.feat_delta_mu(jloc),
                          feat_lv_vec + self.feat_delta_lv(jloc))
        # feat is (bs, )
        feat = dot(F.sum(ivec * jvec, axis=2), ival * jval)

        # Compute the KLD for the group mean vector and variance vector
        kld1 = F.gaussian_kl_divergence(self.feat_mu_vec.b, self.feat_lv_vec.b)
        # Compute the KLD for vector deviations from the group mean and var
        kld2 = F.gaussian_kl_divergence(self.feat_delta_mu.W,
                                        self.feat_delta_lv.W)
        return feat, kld1 + kld2
예제 #28
0
def gaussian_kl_divergence():
    mu_data = np.array([1, 2, 3], dtype=np.float32)
    mu = Variable(mu_data)
    var_data = np.array([1, 4, 9], dtype=np.float32)
    var = Variable(var_data)

    ln_var = F.log(var)

    dim = len(mu_data)
    xp = cuda.get_array_module(var)
    expected_kld = (xp.trace(xp.diag(var)) + mu_data.dot(mu_data) - dim -
                    xp.sum(ln_var)) * 0.5
    computed_kld = F.gaussian_kl_divergence(mu, ln_var)

    print('expected_kld: ', expected_kld)
    print('computed_kld: ', computed_kld)
예제 #29
0
    def __call__(self, x):
        mu, ln_var = self.encoder(x)
        batchsize = len(mu.array)
        kl_penalty = F.gaussian_kl_divergence(mean=mu,
                                              ln_var=ln_var) / batchsize
        reconstr = 0
        for l in range(self.k):
            z = F.gaussian(mu, ln_var)
            recon = self.decoder(z)
            reconstr += 0.5 * F.mean_squared_error(
                recon, x) * x.shape[2] * x.shape[3] * x.shape[4] / self.k

        loss = (reconstr + self.beta * kl_penalty)
        reporter.report({'loss': loss}, self)
        reporter.report({'reconstr': reconstr}, self)
        reporter.report({'kl_penalty': kl_penalty}, self)
        return loss
예제 #30
0
    def _train_vae(self, batch):
        status = {}

        (s, a, _, _, _) = batch
        reconstructed_action, mean, ln_var = self._vae((s, a))
        reconstruction_loss = F.mean_squared_error(reconstructed_action, a)
        latent_loss = 0.5 * \
            F.gaussian_kl_divergence(mean, ln_var, reduce='mean')
        vae_loss = reconstruction_loss + latent_loss

        self._vae_optimizer.target.cleargrads()
        vae_loss.backward()
        vae_loss.unchain_backward()
        self._vae_optimizer.update()

        xp = chainer.backend.get_array_module(vae_loss)
        status['vae_loss'] = xp.array(vae_loss.array)
        return status
 def free_energy_onestep(self): 
     """ 
     [input] 
     x    :  BxHxW[mono] Bx3HW[color] matrix (Variable) 
     errx :  BxHxW[mono] Bx3HW[color] matrix (Variable) 
     """ 
     self.c,self.h,enc_mu,enc_logsig2 = self.encode(self.c,self.h,self.x,self.errx,self.h2) 
     kl = F.gaussian_kl_divergence(enc_mu,enc_logsig2) 
     z = F.gaussian(enc_mu,enc_logsig2)
     z = enc_mu 
     self.c2,self.h2,inc_canvas = self.decode(self.c2,self.h2,z) 
      
     self.canvas += inc_canvas 
     y = F.sigmoid(self.canvas) 
     #y = F.relu(self.canvas+0.5)-F.relu(self.canvas-0.5)
     self.errx = self.x-y 
     self.t += 1         
     return y,kl 
예제 #32
0
    def evaluate(self):
        iterator = self._iterators['main']
        model = self._targets['model']
        encoder = model.encoder
        decoder = model.decoder
        k = model.k
        beta = model.beta

        if hasattr(iterator, 'reset'):
            iterator.reset()
            it = iterator
        else:
            it = copy.copy(iterator)

        summary = reporter.DictSummary()

        for batch in it:
            observation = {}
            with reporter.report_scope(observation):
                x = self.converter(batch, self.device)
                with chainer.using_config('train',
                                          False), chainer.using_config(
                                              'enable_backprop', False):
                    mu, ln_var = encoder(x)
                    batchsize = len(mu.array)
                    kl_penalty = F.gaussian_kl_divergence(
                        mean=mu, ln_var=ln_var) / batchsize
                    reconstr = 0
                    for l in range(k):
                        z = F.gaussian(mu, ln_var)
                        recon = decoder(z)
                        reconstr += 0.5 * F.mean_squared_error(
                            recon,
                            x) * x.shape[2] * x.shape[3] * x.shape[4] / k

                    loss = (reconstr + beta * kl_penalty)

                observation['validation/loss'] = loss
                observation['validation/reconstr'] = reconstr
                observation['validation/kl_penalty'] = kl_penalty

            summary.add(observation)

        return summary.compute_mean()
예제 #33
0
 def calcLoss(self, t, mu, ln_var):
     k = self.sample_size
     kl_zero_epoch = self.kl_zero_epoch
     loss = None
     t_pred = [t_e[1:] + [2] for t_e in t]
     t_pred = [xp.asarray(tp_e, dtype=xp.int32) for tp_e in t_pred]
     t = self.denoiseInput(t)
     print("t:{}".format([self.vocab.itos(t_e) for t_e in t[0]]))
     t_vec = self.makeEmbedBatch(t)
     for l in range(k):
         z = F.gaussian(mu, ln_var)
         if loss is None:
             loss = self.decode(z, t_vec, t_pred) / (k * self.batch_size)
         elif loss is not None:
             loss += self.decode(z, t_vec, t_pred) / (k * self.batch_size)
     C = 0.06 * (self.epoch_now - kl_zero_epoch) / self.epoch
     if self.epoch_now > kl_zero_epoch:
         loss += C * F.gaussian_kl_divergence(mu, ln_var) / self.batch_size
     return loss
예제 #34
0
    def lf(self, x):
        """AutoEncoder"""
        mu, ln_var = self.encode(x)
        # reconstruction loss
        z = F.gaussian(mu, ln_var)
        outputs_mu, outputs_sigma_2 = self.decode(z)

        m_vae_loss = (F.flatten(x) - F.flatten(outputs_mu))**2 \
            / F.flatten(outputs_sigma_2)
        m_vae_loss = 0.5 * F.sum(m_vae_loss)

        a_vae_loss = F.log(2 * 3.14 * F.flatten(outputs_sigma_2))
        a_vae_loss = 0.5 * F.sum(a_vae_loss)

        d_vae_loss = F.gaussian_kl_divergence(mu, ln_var)

        self.loss = F.mean(d_vae_loss + m_vae_loss + a_vae_loss)

        return self.loss
    def free_energy_onestep(self):  #,h2,aa,bb):
        """
        [input]
        x    :  BxHxW[mono] 3BxHxW[color] matrix (Variable)
        errx :  BxHxW[mono] 3BxHxW[color] matrix (Variable)
        
        """

        B = self.B
        C = self.C
        rP = self.Read_patch
        wP = self.Write_patch

        x_patch = self.R_filter.Filter(self.x)
        #print("x_patch max",np.max(x_patch.data))
        errx_patch = self.R_filter.Filter(self.errx)
        #reshape 3BxHxW -> Bx3HW array
        x_patch_2D = F.reshape(x_patch, (B, C * rP**2))
        errx_patch_2D = F.reshape(errx_patch, (B, C * rP**2))

        self.c, self.h, enc_mu, enc_logsig2 = self.encode(
            self.c, self.h, x_patch_2D, errx_patch_2D, self.h2)
        kl = F.gaussian_kl_divergence(enc_mu, enc_logsig2)
        z = F.gaussian(enc_mu, enc_logsig2)

        self.c2, self.h2, inc_canvas, Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma, Rmean_x, Rmean_y, Rln_var, Rln_stride, Rln_gamma = self.decode(
            self.c2, self.h2, z)  #,aa,bb)
        self.W_filter.mkFilter(Wmean_x, Wmean_y, Wln_var, Wln_stride,
                               Wln_gamma)
        self.R_filter.mkFilter(Rmean_x, Rmean_y, Rln_var, Rln_stride,
                               Rln_gamma)
        inc_canvas = F.reshape(inc_canvas, (B * C, wP, wP))
        #print("Wfilter:",np.max(self.W_filter.Fx.data),np.min(self.W_filter.Fx.data),np.max(self.W_filter.Fy.data),np.min(self.W_filter.Fy.data))
        #print("Wmean:{} {}, Wlnvar:{}, Wln_stride:{}, Wln_gamma:{}".format(Wmean_x.data,Wmean_y.data,Wln_var.data,Wln_stride.data,Wln_gamma.data))
        inc_canvas = self.W_filter.InvFilter(inc_canvas)
        self.canvas += inc_canvas
        y = F.sigmoid(
            self.canvas
        )  #F.relu(self.canvas+0.5)-F.relu(self.canvas-0.5) #[normal]:sigmoid, [whitened]:tanh
        self.errx = self.x - y
        self.t += 1
        return y, kl  #,h2
예제 #36
0
    def calcLoss(self, t, categ_vec_h, categ_vec_c, mu, ln_var, wei_arr=None):
        k = self.sample_size
        loss = None
        t_pred = [t_e[1:] + [2] for t_e in t]
        t_pred = [xp.asarray(tp_e, dtype=xp.int32) for tp_e in t_pred]
        t = self.denoiseInput(t)
        t_vec = self.makeEmbedBatch(t)

        for l in range(k):
            z = F.gaussian(mu, ln_var)
            if loss is None:
                loss = self.decode(z, categ_vec_h, categ_vec_c, t_vec, t_pred,
                                   wei_arr) / (k * self.batch_size)
            elif loss is not None:
                loss += self.decode(z, categ_vec_h, categ_vec_c, t_vec, t_pred,
                                    wei_arr) / (k * self.batch_size)
        C = 0.005 * (self.epoch_now - self.kl_zero_epoch) / self.epoch  # 0.02
        if self.epoch_now > self.kl_zero_epoch:
            loss += C * F.gaussian_kl_divergence(mu, ln_var) / self.batch_size
        return loss
예제 #37
0
def train_one(enc, gen, dis, optimizer_enc, optimizer_gen, optimizer_dis, x_batch, gpu_device):
    batch_size = len(x_batch)
    if gpu_device == None:
        xp = np
    else:
        xp = cuda.cupy
    # encode
    x_in = xp.asarray(x_batch)
    z0, mean, var = enc(Variable(x_in))
    x0 = gen(z0)
    y0, l0 = dis(x0)
    loss_enc = F.gaussian_kl_divergence(mean, var) / float(l0.data.size)
    loss_gen = 0
    loss_gen = F.softmax_cross_entropy(y0, Variable(xp.zeros(batch_size).astype(np.int32)))
    loss_dis = F.softmax_cross_entropy(y0, Variable(xp.ones(batch_size).astype(np.int32)))
    # train generator
    z1 = Variable(xp.random.normal(0, 1, (batch_size, latent_size)).astype(np.float32))
    x1 = gen(z1)
    y1, l1 = dis(x1)
    loss_gen += F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32)))
    loss_dis += F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32)))
    # train discriminator
    y2, l2 = dis(Variable(xp.asarray(x_batch)))
    loss_enc += F.mean_squared_error(l0, l2)
    loss_gen += 0.1 * F.mean_squared_error(l0, l2)
    loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size).astype(np.int32)))

    optimizer_enc.zero_grads()
    loss_enc.backward()
    optimizer_enc.update()

    optimizer_gen.zero_grads()
    loss_gen.backward()
    optimizer_gen.update()

    optimizer_dis.zero_grads()
    loss_dis.backward()
    optimizer_dis.update()

    return (float(loss_enc.data), float(loss_gen.data), float(loss_dis.data))
예제 #38
0
    ## lossのbuffer
    sum_enc_loss = 0.
    sum_dec_loss = 0.
    sum_dis_loss = 0.
    sum_gan_loss = 0.
    sum_like_loss = 0.
    sum_prior_loss = 0.
    ## バッチ学習
    for i in six.moves.range(0, N_train, batchsize):
        x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) # バッチ分のデータの抽出
        ##### ForwardとLossの計算
        # KL距離
        mu, ln_var = encode(x, test=False)
        x_rec = decode(mu, sigmoid=True)
        batchsize = len(mu.data)
        kl_loss = F.gaussian_kl_divergence(mu, ln_var) / batchsize

        # ランダムzの生成とランダムzでのdecode ## zはN(0, 1)から生成
        z_p = xp.random.standard_normal(mu.data.shape).astype('float32')
        z_p = chainer.Variable(z_p)
        x_p = decode(z_p)

        # Discriminatorの出力を得る
        d_x_rec, h_out_rec = disc(x_rec)
        d_x_base, h_out_base = disc(x)
        d_x_p, h_out_p = disc(x_p)
        # Discriminatorのsoftmax_cross_entropy
        L_rec = F.softmax_cross_entropy(d_x_rec, Variable(xp.zeros(batchsize, dtype=np.int32)))
        L_base = F.softmax_cross_entropy(d_x_base, Variable(xp.ones(batchsize, dtype=np.int32)))
        L_p = F.softmax_cross_entropy(d_x_p, Variable(xp.zeros(batchsize, dtype=np.int32)))
예제 #39
0
 def check_invalid_option(self, xp):
     m = chainer.Variable(xp.asarray(self.mean))
     v = chainer.Variable(xp.asarray(self.ln_var))
     with self.assertRaises(ValueError):
         F.gaussian_kl_divergence(m, v, 'invalid_option')
예제 #40
0
 def check_gaussian_kl_divergence(self, mean, ln_var):
     m = chainer.Variable(mean)
     v = chainer.Variable(ln_var)
     actual = cuda.to_cpu(F.gaussian_kl_divergence(m, v, self.reduce).data)
     testing.assert_allclose(self.expect, actual)