Beispiel #1
0
    def get_loss_func(self,x, C=1.0):

        batchsize = len(self.encode(x)[0])
        z=list()
        mu, ln_var = self.encode(x)
        for l in six.moves.range(self.sampling_number):
            z.append(F.gaussian(mu, ln_var))
        for iii in range(self.sampling_number):

            if iii==0:
                
                rec_loss=0
                z = F.gaussian(mu, ln_var)
                rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize)
                loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize
                loss=F.reshape(loss,[batchsize,1])

            else:
                rec_loss=0
                z = F.gaussian(mu, ln_var)
                rec_loss += F.sum(F.bernoulli_nll(x, self.decode(z, sigmoid=False), reduce='no'),axis=1)/(batchsize)
                tmp_loss=rec_loss+F.sum(C * gaussian_kl_divergence(mu, ln_var,reduce='no'),axis=1)/ batchsize
                tmp_loss=F.reshape(tmp_loss,[batchsize,1])
                loss=F.concat((loss,tmp_loss),axis=1)
        importance_weight = F.softmax(loss)
        self.total_loss=F.sum(importance_weight*loss)
        return self.total_loss
Beispiel #2
0
	def encode_x_z(self, x, test=False, argmax_y=True):
		x = self.to_variable(x)
		mean, ln_var = self.q_a_x(x, test=test)
		a = F.gaussian(mean, ln_var)
		y = self.sample_x_y(x, argmax=argmax_y, test=test)
		mean, ln_var = self.q_z_axy(a, x, y, test=test)
		return F.gaussian(mean, ln_var)
    def plot_all_imgs(self, index=0):
        f, ax = plt.subplots(2, 2)

        img = self.data[index, 0, :, :]
        img2 = self.data[index, 1, :, :]

        ax[0, 0].imshow(np.reshape(img, (32, 32)))
        ax[0, 0].set_title('Original')
        ax[1, 0].imshow(np.reshape(img2, (32, 32)))
        ax[1, 0].set_title('Original2')

        m, s, m2, s2 = self.autoencoder.encode(
            np.reshape(img, (1, 1, 32, 32)), np.reshape(img2, (1, 1, 32, 32)))
        sample1 = F.gaussian(m, s)
        sample2 = F.gaussian(m2, s2)
        sample = F.concat((sample1, sample2))
        mean = F.concat((m, m2))

        # Reconstruct using sample given m,s
        decoding = np.reshape(
            self.autoencoder.decode(sample, for_plot=False).data, (32, 32))
        give_stats(decoding, 'Decoding')
        im = ax[0, 2].imshow(decoding)
        ax[0, 2].set_title('Reconstruct with sampling')
        data_utils.colorbar(im)

        decoding = np.reshape(
            self.autoencoder.decode(sample, for_plot=True).data, (32, 32))
        give_stats(decoding, 'Decoding Sig')
        im = ax[1, 2].imshow(decoding)
        ax[1, 2].set_title('Sig(Reconstruct with sampling)')
        data_utils.colorbar(im)
Beispiel #4
0
 def encode_x_z(self, x, test=False, argmax_y=True):
     x = self.to_variable(x)
     mean, ln_var = self.q_a_x(x, test=test)
     a = F.gaussian(mean, ln_var)
     y = self.sample_x_y(x, argmax=argmax_y, test=test)
     mean, ln_var = self.q_z_axy(a, x, y, test=test)
     return F.gaussian(mean, ln_var)
Beispiel #5
0
    def reverse_step(self, out, gaussian_eps, squeeze_factor, sampling=True):
        sum_logdet = 0
        if self.split_output:
            if sampling:
                z_distritubion = self.prior(out)
                mean, ln_var = split_channel(z_distritubion)
                zi = cf.gaussian(mean, ln_var, eps=gaussian_eps)
            else:
                zi = gaussian_eps
            out = cf.concat((zi, out), axis=1)
        else:
            if sampling:
                zeros = zeros_like(gaussian_eps)
                z_distritubion = self.prior(zeros)
                mean, ln_var = split_channel(z_distritubion)
                out = cf.gaussian(mean, ln_var, eps=gaussian_eps)
            else:
                out = gaussian_eps

        for flow in self.flows[::-1]:
            out, logdet = flow.reverse_step(out)
            sum_logdet += logdet

        out = unsqueeze(out, factor=squeeze_factor)

        return out, sum_logdet
Beispiel #6
0
	def plot_all_imgs(self,index=0):
		'''
		Plotting procedure for the target, reconstruction pre-sigmoid and reconstruction
		index: index of image in data matrix (because of seeding use same idx for same img)
		'''
		f,ax = plt.subplots(1,3)

		img = self.data[index,0,:,:]
		img2 = self.data[index,1,:,:]
		target = self.data[index,2,:,:]

		ax[0].imshow(np.reshape(target,(32,32)),interpolation="nearest")
		ax[0].set_title('Target')

		m,s,m2,s2 = self.autoencoder.encode(np.reshape(img,(1,1,32,32)),np.reshape(img2,(1,1,32,32)))
		sample1 = F.gaussian(m, s)
		sample2 = F.gaussian(m2,s2)
		sample = F.concat((sample1,sample2))

		# Reconstruct using sample given m,s
		decoding = np.reshape(self.autoencoder.decode(sample,for_plot=False).data,(32,32))
		give_stats(decoding,'Decoding')
		im = ax[1].imshow(decoding, cmap=data_utils.shiftedColorMap(matplotlib.cm.jet, midpoint=data_utils.calcMidpointForCM(decoding), name='shifted'),interpolation="nearest")
		ax[1].set_title('Reconstruction with sampling')
		data_utils.colorbar(im)

		result = np.reshape(self.autoencoder.decode(sample,for_plot=True).data,(32,32))
		give_stats(result,'Decoding Sig')
		im = ax[2].imshow(result,interpolation="nearest")
		ax[2].set_title('Sig(Reconstruction with sampling)')
		data_utils.colorbar(im)

		# Plot MSE in title
		MSE = chainer.functions.mean_squared_error(target, result).data
		f.suptitle("MSE of {:02.10f}".format(float(MSE)))
Beispiel #7
0
 def __call__(self,x,seed):
     if (seed not in self.calledValues):
         w = F.gaussian(self.muW,self.lnSigmaW)
         b = F.gaussian(self.muB,self.lnSigmaB)
         self.calledValues[seed] = (w,b)
     else:
         w,b = self.calledValues[seed]
     return F.linear(x,w,b)
 def _check(self):
     eps = self.eps if self.specify_eps else None
     out, out_eps = functions.gaussian(
         self.m, self.v, eps=eps, return_eps=True)
     assert isinstance(out_eps, type(out.array))
     if eps is None:
         assert out_eps.shape == out.array.shape
     else:
         assert out_eps is eps
     out2 = functions.gaussian(self.m, self.v, eps=out_eps)
     testing.assert_allclose(out.array, out2.array)
Beispiel #9
0
 def _check(self):
     eps = self.eps if self.specify_eps else None
     out, out_eps = functions.gaussian(
         self.m, self.v, eps=eps, return_eps=True)
     assert isinstance(out_eps, type(out.array))
     if eps is None:
         assert out_eps.shape == out.array.shape
     else:
         assert out_eps is eps
     out2 = functions.gaussian(self.m, self.v, eps=out_eps)
     testing.assert_allclose(out.array, out2.array)
Beispiel #10
0
 def generate_latent(self, batch, center=None, sd=1.0):
     zeros = broadcast_to(self.zero, (batch, self.z_size))
     ones = broadcast_to(self.one, (batch, self.z_size))
     zeros.unchain_backward()
     ones.unchain_backward()
     ln_var = log(sd**2) * ones
     if center is None:
         return gaussian(zeros, ln_var)
     else:
         mean_z = broadcast_to(center, (batch, self.z_size))
         return gaussian(mean_z, ln_var)
Beispiel #11
0
    def pretrain_step_vrae_tag(self,
                               x_input,
                               tag,
                               word_drop_ratio=0.0,
                               train=True):
        """
        Maximum likelihood Estimation

        :param x_input:
        :return: loss
        """
        batch_size = len(x_input)
        _, mu_z, ln_var_z = self.encoder.encode_with_tag(x_input, tag, train)

        self.reset_state()

        if self.latent_dim:
            z = F.gaussian(mu_z, ln_var_z)
        else:
            latent = F.gaussian(mu_z, ln_var_z)
            tag_ = self.tag_embed(
                chainer.Variable(self.xp.array(tag, 'int32'),
                                 volatile=not train))
            self.lstm1.h = self.dec_input(F.concat((latent, tag_)))
            z = None

        accum_loss = 0
        for i in range(self.sequence_length):
            if i == 0:
                x = chainer.Variable(self.xp.asanyarray([self.start_token] *
                                                        batch_size, 'int32'),
                                     volatile=not train)
            else:
                if np.random.random() < word_drop_ratio and train:
                    x = chainer.Variable(self.xp.asanyarray(
                        [self.start_token] * batch_size, 'int32'),
                                         volatile=not train)
                else:
                    x = chainer.Variable(self.xp.asanyarray(
                        x_input[:, i - 1], 'int32'),
                                         volatile=not train)

            scores = self.decode_one_step(x, z=z)
            loss = F.softmax_cross_entropy(
                scores,
                chainer.Variable(self.xp.asanyarray(x_input[:, i], 'int32'),
                                 volatile=not train))
            accum_loss += loss

        dec_loss = accum_loss
        kl_loss = F.gaussian_kl_divergence(mu_z, ln_var_z) / batch_size
        return dec_loss, kl_loss
Beispiel #12
0
 def generate(self,N,sampling_x=False):
     z_dim = self['dec_l1'].W.shape[1]
     if(isinstance(self['dec_l1'].W,numpy.ndarray)):
         zero_mat = Variable(numpy.zeros((N,z_dim),'float32'))
         z = F.gaussian(zero_mat,zero_mat)
     else:
         raise NotImplementedError()
     dec_mu, dec_log_sigma_2 = self.decode(z)
     if(sampling_x):
         x = F.gaussian(dec_mu,dec_log_sigma_2)
     else:
         x = dec_mu
     return x
    def __call__(self, x, top_down=None):
        if top_down is not None:
            x = self.xp.concatenate(x, top_down)
        (z_mu, z_var, state) = self.encoder(x)
        z = F.gaussian(z_mu, z_var)
        (x_mu, x_var) = self.decoder(z)

        (a_mu, a_var) = self.action(z, state)
        a = F.gaussian(a_mu, a_var)
        self.a = a
        self.x_hat = x_mu

        return a, (x_mu, x_var), (z_mu, z_var), (a_mu, a_var)
 def generate(self, N, sampling_x=False):
     z_dim = self['dec_l1'].W.shape[1]
     if (isinstance(self['dec_l1'].W, np.ndarray)):
         zero_mat = Variable(np.zeros((N, z_dim), 'float32'))
         z = F.gaussian(zero_mat, zero_mat)
     else:
         raise NotImplementedError()
     dec_mu, dec_log_sigma_2 = self.decode(z)
     if (sampling_x):
         x = F.gaussian(dec_mu, dec_log_sigma_2)
     else:
         x = dec_mu
     return x
Beispiel #15
0
    def forward_down(self, x, sample=False):
        """
        """
        h = F.elu(x)
        h = self.down1(h)
        sections = [self.z_dim, self.z_dim*2, self.z_dim*3,
                    self.z_dim*4, self.z_dim*4+self.h_dim]
        pz_mean, pz_logv, rz_mean, rz_logv, down_context, h_det = \
            F.split_axis(h, sections, axis=1)

        prior = F.gaussian(pz_mean, 2 * pz_logv)
        logps = self.gaussian_diag_logps(pz_mean, 2*pz_logv, prior)

        if sample:
            z = prior
            context = 0
            logqs = chainer.Variable(
                self.xp.zeros(logps.shape, dtype="float32"), name="logqs")
        else:
            post_mean = rz_mean + self.qz_mean
            post_logv = 2 * (rz_logv + self.qz_logv)
            posterior = F.gaussian(post_mean, post_logv)
            context = self.up_context + down_context
            logqs = self.gaussian_diag_logps(post_mean, post_logv, posterior)

            z = posterior

        # autoregressive nn
        h = self.ar1(z)
        h = h + context
        h = self.ar2(h)
        sections = [self.z_dim]
        arw_mean, arw_logv = F.split_axis(h, sections, axis=1)
        # arw_mean, arw_logv = h[0] * 0.1, h[1] * 0.1  # ??
        z = (z - 0.1*arw_mean) / F.exp(F.clip(0.1*arw_logv, -100., 100.))
        logqs += arw_logv

        kl_cost = logqs - logps
        kl_cost, kl_obj = self.kl_sum(kl_cost)

        z = F.concat([z, h_det])
        z = F.elu(z)
        z = self.down2(z)
        if self.downsample:
            output_shape = z.shape[2:]
            x = F.resize_images(x, output_shape)

        z = x + 0.1 * z
        return z, kl_obj, kl_cost
Beispiel #16
0
    def generate_canvas_states(self, v, r, xp):
        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)

        v = cf.reshape(v, v.shape[:2] + (1, 1))

        u_t_array = []

        for t in range(self.num_layers):
            generation_core = self.get_generation_core(t)

            mean_z_p, ln_var_z_p = self.z_prior_distribution.compute_parameter(
                h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen, u_next = generation_core(
                h_t_gen, c_t_gen, z_t, v, r, u_t)

            u_t = u_next
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

            u_t_array.append(u_t)

        return u_t_array
Beispiel #17
0
	def __call__(self, x, test=False):
		if test == True:
			return x
		xp = cuda.get_array_module(x.data)
		ln_var = math.log(self.std ** 2)
		noise = F.gaussian(Variable(xp.zeros_like(x.data)), Variable(xp.full_like(x.data, ln_var)))
		return x + noise
Beispiel #18
0
	def __call__(self, x):
		if chainer.config.train == False:
			return x
		xp = cuda.get_array_module(x.data)
		std = math.log(self.std ** 2)
		noise = functions.gaussian(chainer.Variable(xp.zeros_like(x.data)), chainer.Variable(xp.full_like(x.data, std)))
		return x + noise
    def train(self, x, L=1, test=False):
        batchsize = x.data.shape[0]
        z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False)
        loss = 0
        for l in xrange(L):
            # Sample z
            z = F.gaussian(z_mean, z_ln_var)

            # Compute lower bound
            log_px_z = self.log_px_z(x, z, test=test)
            log_pz = self.log_pz(z, z_mean, z_ln_var)
            log_qz_x = self.log_qz_x(z, z_mean, z_ln_var)
            lower_bound = log_px_z + log_pz - log_qz_x

            loss += -lower_bound

        loss = F.sum(loss) / L / batchsize

        self.zero_grads()
        loss.backward()
        self.update()

        if self.gpu:
            loss.to_cpu()
        return loss.data
Beispiel #20
0
def _infer_z(mu, ln_var):
    batch_size = mu.data.shape[0]
    var = F.exp(ln_var)
    z = F.gaussian(mu, ln_var)
    kl = -F.sum(1 + ln_var - mu**2 - var) / 2
    kl /= batch_size
    return z, kl
Beispiel #21
0
    def update_core(self):
        batch = self._iterators['main'].next()
        x = Variable(self.converter(batch, self.device))
        xp = cuda.get_array_module(x.data)

        enc = self.enc
        opt_enc = self._optimizers['enc']
        dec = self.dec
        opt_dec = self._optimizers['dec']

        mu, ln_var = enc(x)

        batchsize = len(mu.data)
        rec_loss = 0
        k = 10
        for l in range(k):
            z = F.gaussian(mu, ln_var)
            rec_loss += F.bernoulli_nll(x, dec(
                z, sigmoid=False)) / (k * batchsize)

        loss = rec_loss + 1.0 * F.gaussian_kl_divergence(mu,
                                                         ln_var) / batchsize

        enc.cleargrads()
        dec.cleargrads()
        loss.backward()
        opt_enc.update()
        opt_dec.update()

        chainer.report({'rec_loss': rec_loss})
        chainer.report({'loss': loss})
Beispiel #22
0
 def lf(frames):
     mu, ln_var = self.encode(frames)
     z = F.gaussian(mu, ln_var)
     frames_flat = F.reshape(
         frames,
         (-1, frames.shape[1] * frames.shape[2] * frames.shape[3]))
     variational_flat = F.reshape(
         self.decode(z),
         (-1, frames.shape[1] * frames.shape[2] * frames.shape[3]))
     rec_loss = F.sum(F.square(frames_flat - variational_flat),
                      axis=1)  # l2 reconstruction loss
     rec_loss = F.mean(rec_loss)
     kl_loss = F.sum(F.gaussian_kl_divergence(mu, ln_var, reduce="no"),
                     axis=1)
     if self._cpu:
         kl_tolerance = np.asarray(self.kl_tolerance *
                                   self.n_latent).astype(np.float32)
     else:
         kl_tolerance = cp.asarray(self.kl_tolerance *
                                   self.n_latent).astype(cp.float32)
     kl_loss = F.maximum(kl_loss,
                         F.broadcast_to(kl_tolerance, kl_loss.shape))
     kl_loss = F.mean(kl_loss)
     loss = rec_loss + kl_loss
     chainer.report({'loss': loss}, observer=self)
     chainer.report({'kl_loss': kl_loss}, observer=self)
     chainer.report({'rec_loss': rec_loss}, observer=self)
     return loss
Beispiel #23
0
 def sample_with_log_prob(self):
     x = F.gaussian(self.mean, self.ln_var)
     normal_log_prob = _eltwise_gaussian_log_likelihood(
         x, self.mean, self.var, self.ln_var)
     log_probs = normal_log_prob - _tanh_forward_log_det_jacobian(x)
     y = F.tanh(x)
     return y, F.sum(log_probs, axis=1)
Beispiel #24
0
    def reconst(self, data, unregular=False):
        if data.ndim == 1:
            data = data.reshape(1,-1)

        with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
            e_mu,e_var = self.encode(Variable(data))
            feat = F.gaussian(e_mu, e_var).data
            d_out = self.decode(Variable(feat))

        if self.is_gauss_dist:
            rec = d_out[0].data
            if unregular: # 非正則化項のやつ
                d_mu, d_var = d_out
                D_VAE = F.gaussian_kl_divergence(e_mu, e_var)
                A_VAE = 0.5* ( np.log(2*np.pi) + d_var )
                M_VAE = 0.5* ( data-d_mu )**2 * F.exp(-d_var)
                return feat, rec, M_VAE.data
        else:
            rec = F.sigmoid(d_out).data
        mse = np.mean( (rec-data)**2, axis=1 )

        # lat_loss = F.gaussian_kl_divergence(e_mu, e_var)
        # rec_loss = F.bernoulli_nll( Variable(data), d_out )
        # vae_err = (lat_loss+rec_loss).data

        return feat, rec, mse
Beispiel #25
0
 def _encode(self, s, a):
     mu, ln_var = self._latent_distribution(s, a)
     # 2 * ln_std = ln_var
     # original code is written in ln_std form
     # Clip for numerical stability
     ln_var = F.clip(ln_var, x_min=-8, x_max=30)
     return F.gaussian(mu, ln_var), mu, ln_var
Beispiel #26
0
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """

        g = F.broadcast_to(
            F.gaussian(
                np.array([0], dtype=np.float32),
                np.array([np.exp(1)], dtype=np.float32)), x.shape)
            
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Beispiel #27
0
    def generate_image(self, v, r, xp):
        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)

        v_broadcast_shape = (
            h_t_gen.shape[0],
            v.shape[1],
        ) + h_t_gen.shape[2:]
        v = xp.reshape(v, v.shape + (1, 1))
        v = xp.broadcast_to(v, shape=v_broadcast_shape)

        for t in range(self.generation_steps):
            generation_core = self.get_generation_core(t)
            generation_piror = self.get_generation_prior(t)
            generation_upsampler = self.get_generation_upsampler(t)

            mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v,
                                                     r)

            u_t = u_t + generation_upsampler(h_next_gen)
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data
	def train(self, x, L=1, test=False):
		batchsize = x.data.shape[0]
		z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False)
		loss = 0
		for l in xrange(L):
			# Sample z
			z = F.gaussian(z_mean, z_ln_var)

			# Compute lower bound
			log_px_z = self.log_px_z(x, z, test=test)
			log_pz = self.log_pz(z, z_mean, z_ln_var)
			log_qz_x = self.log_qz_x(z, z_mean, z_ln_var)
			lower_bound = log_px_z + log_pz - log_qz_x

			loss += -lower_bound

		loss = F.sum(loss) / L / batchsize

		self.zero_grads()
		loss.backward()
		self.update()

		if self.gpu:
			loss.to_cpu()
		return loss.data
Beispiel #29
0
    def generate_canvas_states(self, v, r, xp):
        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)

        v = cf.reshape(v, v.shape[:2] + (1, 1))

        u_t_array = []

        for t in range(self.generation_steps):
            generation_core = self.get_generation_core(t)
            generation_piror = self.get_generation_prior(t)
            generation_upsampler = self.get_generation_upsampler(t)

            mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v,
                                                     r, u_t)

            u_t = u_t + generation_upsampler(h_next_gen)
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

            u_t_array.append(u_t)

        return u_t_array
Beispiel #30
0
        def lf(x):
            mu, ln_var = self.encode(x)
            mean_mu, mean_sigma = calculate_means(mu, ln_var)
            batchsize = len(mu.data)
            std_mu, std_ln_var = generate_std_params(mu)

            # reconstruction loss
            rec_loss = 0
            kl_loss = 0
            for l in six.moves.range(k):
                z = F.gaussian(mu, ln_var)
                rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) / (k * batchsize)
                kl_loss += -F.gaussian_nll(z, mu, ln_var) / (k * batchsize)
                kl_loss += F.gaussian_nll(z, std_mu, std_ln_var) / (k * batchsize)

            self.rec_loss = rec_loss
            self.kl_loss = kl_loss
            self.loss = self.rec_loss + C * self.kl_loss
            chainer.report(
                {
                    'rec_loss': rec_loss,
                    'kl': self.kl_loss,
                    'loss': self.loss,
                    'mu': mean_mu,
                    'sigma': mean_sigma,
                },
                observer=self
            )
            return self.loss
Beispiel #31
0
def _infer_z(mu, ln_var):
    batch_size = mu.data.shape[0]
    var = F.exp(ln_var)
    z = F.gaussian(mu, ln_var)
    kl = -F.sum(1 + ln_var - mu ** 2 - var) / 2
    kl /= batch_size
    return z, kl
Beispiel #32
0
    def __call__(self, in_data):
        """in_data: (B, N, C, H, W)"""

        assert in_data.ndim == 5  # BNCHW

        xp = self.xp
        batch_size, nframes, nchannels = in_data.shape[:3]
        in_size = in_data.shape[3:]

        assert nframes == self.num_episodes, "%s != %s" % (self.num_episodes,
                                                           nframes)

        self.reset_state()

        x = resize_seq_images(in_data, (128, 128))

        hidden = self.encoder(x)

        # add gaussian noise
        if chainer.config.train:
            noise_sigma = xp.log(self.noise_sigma**2, dtype=hidden.dtype)
            ln_var = xp.ones_like(hidden, dtype=hidden.dtype) * noise_sigma
            hidden = F.gaussian(hidden, ln_var)

        reconst = self.decoder_reconst(hidden)
        pred = self.decoder_pred(hidden)

        reconst = resize_seq_images(reconst, in_size)
        pred = resize_seq_images(pred, in_size)

        assert reconst.shape == in_data.shape
        assert pred.shape == in_data.shape

        return reconst, pred, hidden
 def generate_onestep(self):  #,h2,aa,bb):
     """
     generate from middle layer
     #call reset() first, but no relation between img_array[input] and generated image[output] 
     [input]
     x    :  BxHxW[mono] 3BxHxW[color] matrix (Variable)
     errx :  BxHxW[mono] 3BxHxW[color] matrix (Variable)
     [output]
     y   :   BxHxW[mono] 3BxHxW[color] matrix (Variable)
             [normal]:sigmoid,relu [whitened]:tanh
     """
     zero_mat = XP.fzeros((self.B, self.z_dim))
     z = F.gaussian(zero_mat, zero_mat)  #F.gaussian(mean,ln_var)
     self.c2, self.h2, inc_canvas, Wmean_x, Wmean_y, Wln_var, Wln_stride, Wln_gamma, Rmean_x, Rmean_y, Rln_var, Rln_stride, Rln_gamma = self.decode(
         self.c2, self.h2, z)  #,aa,bb)
     self.W_filter.mkFilter(Wmean_x, Wmean_y, Wln_var, Wln_stride,
                            Wln_gamma)
     inc_canvas = F.reshape(
         inc_canvas, (self.B * self.C, self.Write_patch, self.Write_patch))
     inc_canvas = self.W_filter.InvFilter(inc_canvas)
     self.canvas += inc_canvas
     y = F.relu(self.canvas + 0.5) - F.relu(
         self.canvas -
         0.5)  #F.sigmoid(self.canvas) #[normal]:sigmoid, [whitened]:tanh
     self.errx = self.x - y
     self.t += 1
     return y  #,h2
 def sample_g0(self, zs):
     mu = self.l_g0_mu(zs)
     ln_var = self.l_g0_ln_var(zs)
     g_0 = F.gaussian(mu, ln_var)
     batchsize = len(mu.data)
     kl_g0 = gaussian_kl_divergence(mu, ln_var) / batchsize
     return g_0, kl_g0
    def __call__(self, z, c, test=False):
        ### text augmentation
        hc_mu = F.leaky_relu(self.lc_mu(c))
        hc_var = F.leaky_relu(self.lc_var(c))
        h_c = F.gaussian(hc_mu, hc_var)
        
        ### concate z and c
        h = F.concat((z, h_c))

        ### generate image
        h1_0 = F.reshape(self.bn0(self.l0(h), test=test), (h.data.shape[0], self.gf_dim*8, self.s16, self.s16))
        h1_1 = self.dc1_1(h1_0, test=test)
        h1_1 = self.dc1_2(h1_1, test=test)
        h1_1 = self.dc1_3(h1_1, test=test)
        h = F.relu(h1_0+h1_1)
        
        h2_0 = self.dc2(h, test=test)
        h2_1 = self.dc2_1(h2_0, test=test)
        h2_1 = self.dc2_2(h2_1, test=test)
        h2_1 = self.dc2_3(h2_1, test=test)  
        h = F.relu(h2_0+h2_1)
        
        h = self.dc3(h, test=test)
        h = self.dc4(h, test=test)

        x = F.tanh(self.dc5(h, test=test))
        if test:
            return x
        else:
            return x, hc_mu, hc_var
Beispiel #36
0
    def term_slop(self, loc, val, bs, nf, train=True):
        """ Compute the slope for each active feature.
        """
        shape = (bs, nf)

        # Reshape all of our constants
        pr_mu = F.broadcast_to(self.slop_mu.b, shape)
        pr_lv = F.broadcast_to(self.slop_lv.b, shape)
        # This is either zero or a very negative number
        # indicating to sample N(mean, logvar) or just draw
        # the mean preicsely
        if not train:
            pr_lv += self.lv_floor

        # The feature slopes are grouped together so that they
        # all share a common mean. Then individual features slop_delta_lv
        # are shrunk towards zero, which effectively sets features to fall
        # back on the group mean.
        sl_mu = F.reshape(self.slop_delta_mu(loc), shape) + pr_mu
        sl_lv = F.reshape(self.slop_delta_lv(loc), shape) + pr_lv
        coef = F.gaussian(sl_mu, sl_lv)
        slop = F.sum(coef * val, axis=1)

        # Calculate divergence between group mean and N(0, 1)
        kld1 = F.gaussian_kl_divergence(self.slop_mu.b, self.slop_lv.b)
        # Calculate divergence of individual delta means and delta vars
        args = (self.slop_delta_mu.W, self.slop_delta_lv.W)
        kld2 = F.gaussian_kl_divergence(*args)

        return slop, kld1 + kld2
Beispiel #37
0
 def __call__(self, x, test=False):
     if test == True:
         return x
     xp = cuda.get_array_module(x.data)
     ln_var = math.log(self.std ** 2)
     noise = F.gaussian(Variable(xp.zeros_like(x.data)), Variable(xp.full_like(x.data, ln_var)))
     return x + noise
Beispiel #38
0
    def generate_image(self, v, r):
        xp = cuda.get_array_module(v)

        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)
        v = cf.reshape(v, v.shape[:2] + (1, 1))

        for t in range(self.generation_steps):
            generation_core = self.get_generation_core(t)
            generation_piror = self.get_generation_prior(t)
            generation_upsampler = self.get_generation_upsampler(t)

            mean_z_p, ln_var_z_p = generation_piror.compute_parameter(h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen = generation_core(h_t_gen, c_t_gen, z_t, v,
                                                     r, u_t)

            u_t = u_t + generation_upsampler(h_next_gen)
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data
Beispiel #39
0
	def encode_x_y_distribution(self, x, test=False, softmax=True):
		x = self.to_variable(x)
		mean, ln_var = self.q_a_x(x, test=test)
		a = F.gaussian(mean, ln_var)
		y = self.q_y_ax(a, x, test=test)
		if softmax:
			return F.softmax(y)
		return y
Beispiel #40
0
 def free_energy(self,x):
     #return -(free energy)
     enc_mu, enc_log_sigma_2 = self.encode(x)
     kl = F.gaussian_kl_divergence(enc_mu,enc_log_sigma_2)
     z = F.gaussian(enc_mu,enc_log_sigma_2)
     dec_mu = self.decode(z)
     nll = F.bernoulli_nll(x,dec_mu)
     return nll+kl
Beispiel #41
0
    def check_forward(self, m_data, v_data):
        m = chainer.Variable(m_data)
        v = chainer.Variable(v_data)
        n = functions.gaussian(m, v)

        # Only checks dtype and shape because its result contains noise
        self.assertEqual(n.dtype, numpy.float32)
        self.assertEqual(n.shape, m.shape)
Beispiel #42
0
	def __call__(self, x):
		if chainer.config.train == False:
			return x
		data = x.data if isinstance(x, chainer.Variable) else x
		xp = cuda.get_array_module(data)
		ln_var = math.log(self.std ** 2)
		noise = functions.gaussian(xp.full_like(data, self.mean), xp.full_like(data, ln_var))
		return x + noise
Beispiel #43
0
 def encode(self, bow):
     """ Convert the bag of words vector of shape (n_docs, n_vocab)
     into latent mean log variance vectors.
     """
     lam = F.relu(self.l1(bow))
     pi = F.relu(self.l2(lam))
     mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1)
     sample = F.gaussian(mu, log_sigma)
     loss = F.gaussian_kl_divergence(mu, log_sigma)
     return sample, loss
Beispiel #44
0
 def predict(self, x):
     a_mean, a_ln_var = split(self.q_a_given_x(x))
     y_pred = chainer.Variable(
         self.xp.zeros((len(x.data), self.y_dim), dtype=np.float32),
         volatile='auto')
     for _ in six.moves.range(self.sampling_predict):
         a = F.gaussian(a_mean, a_ln_var)
         y_pred += F.softmax(self.q_y_given_a_x(a, x))
     y_pred /= self.sampling_predict
     return y_pred
Beispiel #45
0
 def __call__(self, x):
     xp = self.encoder.xp
     x = Variable(xp.asarray(x))
     zm, zv = self.encoder((x,))
     z = F.gaussian(zm, zv)
     mean, ln_var = self.decoder((z,))
     kl_loss = F.gaussian_kl_divergence(zm, zv)
     nll_loss = F.gaussian_nll(x, mean, ln_var)
     loss = kl_loss + nll_loss
     return loss
Beispiel #46
0
    def __call__(self, x):
        x = Variable(x)
        start = time.time()
        zm, zv = self.encoder((x,))
        z = F.gaussian(zm, zv)
        y = self.decoder((z,))[0]
        kl_loss = F.gaussian_kl_divergence(zm, zv)
        nll_loss = F.bernoulli_nll(x, y)

        loss = kl_loss + nll_loss
        return loss
Beispiel #47
0
 def lf(x):
     mu, ln_var = self.encode(x)
     batchsize = len(mu.data)
     # reconstruction loss
     rec_loss = 0
     for l in six.moves.range(k):
         z = F.gaussian(mu, ln_var)
         rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \
             / (k * batchsize)
     self.rec_loss = rec_loss
     self.loss = self.rec_loss + \
         C * gaussian_kl_divergence(mu, ln_var) / batchsize
     return self.loss
Beispiel #48
0
    def forward(self, x, l, train, action):
        if self.xp == np:
            loc = l.data
        else:
            loc = self.xp.asnumpy(l.data)
        margin = self.g_size/2
        loc = (loc+1)*0.5*(self.in_size-self.g_size+1) + margin
        loc = np.clip(loc, margin, self.in_size-margin)
        loc = np.floor(loc).astype(np.int32)

        # Retina Encoding
        hx = crop(x, loc=loc, size=self.g_size)
        hx = F.relu(self.emb_x(hx))

        # Location Encoding
        hl = F.relu(self.emb_l(l))

        # Glimpse Net
        g = F.relu(self.fc_lg(hl) + self.fc_xg(hx))

        # Core Net
        h = self.core_lstm(g) #  LSTM(g + h_t-1)

        # Location Net
        l = F.tanh(self.fc_hl(h))

        if train:
            # sampling location l
            s = F.gaussian(mean=l, ln_var=self.ln_var)
            s = F.clip(s, -1., 1.)

            # location policy
            l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1)
            s1, s2 = F.split_axis(s, indices_or_sections=2, axis=1)
            norm = (s1-l1)*(s1-l1) + (s2-l2)*(s2-l2)
            ln_p = 0.5 * norm / self.var
            ln_p = F.reshape(ln_p, (-1,))

        if action:
            # Action Net
            y = self.fc_ha(h)

            if train:
                return s, ln_p, y
            else:
                return l, None, y
        else:
            if train:
                return s, ln_p, None
            else:
                return l, None, None
Beispiel #49
0
    def check_backward(self, m_data, v_data, y_grad):
        m = chainer.Variable(m_data)
        v = chainer.Variable(v_data)
        y = functions.gaussian(m, v)
        self.assertEqual(y.data.dtype, numpy.float32)
        y.grad = y_grad
        y.backward()

        func = y.creator
        f = lambda: func.forward((m.data, v.data))
        gm, gv = gradient_check.numerical_grad(f, (m.data, v.data), (y.grad,))

        gradient_check.assert_allclose(gm, m.grad, atol=1e-4, rtol=1e-3)
        gradient_check.assert_allclose(gv, v.grad, atol=1e-4, rtol=1e-3)
Beispiel #50
0
    def test_forward(self, backend_config):
        # TODO(niboshi): Support it
        if backend_config.use_chainerx and self.dtype == numpy.float16:
            raise unittest.SkipTest('ChainerX does not support float16')

        m_data, v_data = backend_config.get_array((self.m, self.v))

        m = chainer.Variable(m_data)
        v = chainer.Variable(v_data)

        # Call forward without eps and retrieve it
        n1, eps = functions.gaussian(m, v, return_eps=True)

        self.assertIsInstance(eps, backend_config.xp.ndarray)
        self.assertEqual(n1.dtype, self.dtype)
        self.assertEqual(n1.shape, m.shape)
        self.assertEqual(eps.dtype, self.dtype)
        self.assertEqual(eps.shape, m.shape)

        # Call again with retrieved eps
        n2 = functions.gaussian(m, v, eps=eps)
        self.assertEqual(n2.dtype, self.dtype)
        self.assertEqual(n2.shape, m.shape)
        testing.assert_allclose(n1.array, n2.array)
Beispiel #51
0
 def lf(x):
     mu, ln_var = self.encode(x)
     batchsize = len(mu)
     # reconstruction loss
     rec_loss = 0
     for l in six.moves.range(k):
         z = F.gaussian(mu, ln_var)
         rec_loss += F.bernoulli_nll(x, self.decode(z, sigmoid=False)) \
             / (k * batchsize)
     self.rec_loss = rec_loss
     self.loss = self.rec_loss + \
         beta * gaussian_kl_divergence(mu, ln_var) / batchsize
     chainer.report(
         {'rec_loss': rec_loss, 'loss': self.loss}, observer=self)
     return self.loss
	def forward_one_step_gaussian(self, x, test):
		activate = activations[self.activation_type]

		chain_mean = [x]
		chain_variance = [x]

		# Hidden
		for i in range(self.n_layers - 1):
			u = getattr(self, "layer_mean_%i" % i)(chain_mean[-1])
			if self.apply_batchnorm:
				u = getattr(self, "batchnorm_mean_%i" % i)(u, test=test)
			output = activate(u)
			if self.enable_dropout:
				output = F.dropout(output, train=not test)
			chain_mean.append(output)

			u = getattr(self, "layer_variance_%i" % i)(chain_variance[-1])
			if self.apply_batchnorm:
				u = getattr(self, "batchnorm_variance_%i" % i)(u, test=test)
			output = activate(u)
			if self.enable_dropout:
				output = F.dropout(output, train=not test)
			chain_variance.append(output)


		# Output
		u = getattr(self, "layer_mean_%i" % (self.n_layers - 1))(chain_mean[-1])
		if self.apply_batchnorm and self.apply_batchnorm_to_output:
			u = getattr(self, "batchnorm_mean_%i" % (self.n_layers - 1))(u, test=test)
		if self.output_activation_type is None:
			chain_mean.append(u)
		else:
			chain_mean.append(activations[self.output_activation_type](u))

		u = getattr(self, "layer_variance_%i" % (self.n_layers - 1))(chain_variance[-1])
		if self.apply_batchnorm and self.apply_batchnorm_to_output:
			u = getattr(self, "batchnorm_variance_%i" % (self.n_layers - 1))(u, test=test)
		if self.output_activation_type is None:
			chain_variance.append(u)
		else:
			chain_variance.append(activations[self.output_activation_type](u))

		mean = chain_mean[-1]

		## log(sigma^2)
		ln_var = chain_variance[-1]

		return F.gaussian(mean, ln_var)
Beispiel #53
0
    def loss_z_dep(self, x, y, a):
        def to_onehot(y, T):
            ret = np.zeros((len(y), T), dtype=np.float32)
            ret[:, y.get()] = 1.0
            return chainer.Variable(self.xp.asarray(ret), volatile='auto')

        y = to_onehot(y.data, self.y_dim)
        z_mean, z_ln_var = split(self.q_z_given_a_y_x(a, y, x))
        z = F.gaussian(z_mean, z_ln_var)
        a_mean, a_ln_var = split(self.p_a_given_z_y_x(z, y, x))
        x_mean, _ = split(self.p_x_given_z_y(z, y))
        zero = chainer.Variable(self.xp.zeros_like(z.data), volatile='auto')

        nll_p_z = F.sum(l.gaussian_nll(z, zero, zero), axis=1)
        nll_p_x_given_z_y = F.sum(l.bernoulli_nll(x, x_mean), axis=1)
        nll_p_a_given_z_y_x = F.sum(
            l.gaussian_nll(a, a_mean, a_ln_var), axis=1)
        nll_q_z_given_a_y_x = F.sum(
            l.gaussian_nll(z, z_mean, z_ln_var), axis=1)

        return (nll_p_z + nll_p_x_given_z_y +
                nll_p_a_given_z_y_x - nll_q_z_given_a_y_x)
Beispiel #54
0
    def loss_one(self, x, y=None):
        a_mean, a_ln_var = split(self.q_a_given_x(x))
        a = F.gaussian(a_mean, a_ln_var)

        loss = -F.sum(l.gaussian_nll(a, a_mean, a_ln_var))  # nll(q(a|x))
        loss += np.log(self.y_dim)  # nll(p(y))
        if y is None:
            losses_z_dep = []
            for i in six.moves.range(self.y_dim):
                y_ = chainer.Variable(
                    self.xp.full((len(x.data), self.y_dim), i, dtype=np.int32),
                    volatile='auto')
                loss_z_dep = self.loss_z_dep(x, y_, a)
                loss_z_dep = F.reshape(loss_z_dep, (-1, 1))
                losses_z_dep.append(loss_z_dep)
            q_y_given_a_x = F.softmax(self.q_y_given_a_x(a, x))
            loss -= entropy(q_y_given_a_x)  # nll(q(y|a,x))
            # nll(p(z)) + nll(p(x|z, y)) + nll(p(a|x, y, z)) - nll(q(z|a, y, x))
            loss += F.sum(F.concat(losses_z_dep) * q_y_given_a_x)
        else:
            # nll(p(z)) + nll(p(x|z, y)) + nll(p(a|x, y, z)) - nll(q(z|a, y, x))
            loss += F.sum(self.loss_z_dep(x, y, a))
            loss += self.gamma * self.classification_loss(x, y)  # cls loss
        return loss
	def train(self, x, L=1, test=False):
		batchsize = x.data.shape[0]
		z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False)
		loss = 0
		for l in xrange(L):
			# Sample z
			z = F.gaussian(z_mean, z_ln_var)
			# Decode
			x_expectation = self.decoder(z, test=test, apply_f=False)
			# E_q(z|x)[log(p(x|z))]
			loss += self.bernoulli_nll_keepbatch(x, x_expectation)
		if L > 1:
			loss /= L
		# KL divergence
		loss += self.gaussian_kl_divergence_keepbatch(z_mean, z_ln_var)
		loss = F.sum(loss) / batchsize

		self.zero_grads()
		loss.backward()
		self.update()

		if self.gpu:
			loss.to_cpu()
		return loss.data
Beispiel #56
0
	def encode_x_a(self, x, test=False):
		x = self.to_variable(x)
		mean, ln_var = self.q_a_x(x, test=test)
		return F.gaussian(mean, ln_var)
Beispiel #57
0
 def f(m, v):
     # In case numerical gradient computation is held in more precise
     # dtype than that of backward computation, cast the eps to reuse
     # before the numerical computation.
     eps_ = eps.astype(m.dtype)
     return functions.gaussian(m, v, eps=eps_)
	def compute_lower_bound_loss(self, labeled_x, labeled_y, label_ids, unlabeled_x, test=False):

		def lower_bound(log_px_zy, log_py, log_pz, log_qz_xy):
			lb = log_px_zy + log_py + log_pz - log_qz_xy
			return lb

		# _l: labeled
		# _u: unlabeled
		batchsize_l = labeled_x.data.shape[0]
		batchsize_u = unlabeled_x.data.shape[0]
		num_types_of_label = labeled_y.data.shape[1]
		xp = self.xp

		### Lower bound for labeled data ###
		# Compute eq.6 -L(x,y)
		z_mean_l, z_ln_var_l = self.encoder_xy_z(labeled_x, labeled_y, test=test, apply_f=False)
		z_l = F.gaussian(z_mean_l, z_ln_var_l)
		log_px_zy_l = self.log_px_zy(labeled_x, z_l, labeled_y, test=test)
		log_py_l = self.log_py(labeled_y, test=test)
		if False:
			log_pz_l = self.log_pz(z_l, z_mean_l, z_ln_var_l, test=test)
			log_qz_xy_l = self.log_qz_xy(z_l, z_mean_l, z_ln_var_l, test=test)
			lower_bound_l = lower_bound(log_px_zy_l, log_py_l, log_pz_l, log_qz_xy_l)
		else:
			lower_bound_l = log_px_zy_l + log_py_l - self.gaussian_kl_divergence_keepbatch(z_mean_l, z_ln_var_l)

		if batchsize_u > 0:
			### Lower bound for unlabeled data ###
			# To marginalize y, we repeat unlabeled x, and construct a target (batchsize_u * num_types_of_label) x num_types_of_label
			# Example of n-dimensional x and target matrix for a 3 class problem and batch_size=2.
			#         unlabeled_x_ext                 y_ext
			#  [[x0[0], x0[1], ..., x0[n]]         [[1, 0, 0]
			#   [x1[0], x1[1], ..., x1[n]]          [1, 0, 0]
			#   [x0[0], x0[1], ..., x0[n]]          [0, 1, 0]
			#   [x1[0], x1[1], ..., x1[n]]          [0, 1, 0]
			#   [x0[0], x0[1], ..., x0[n]]          [0, 0, 1]
			#   [x1[0], x1[1], ..., x1[n]]]         [0, 0, 1]]

			unlabeled_x_ext = xp.zeros((batchsize_u * num_types_of_label, unlabeled_x.data.shape[1]), dtype=xp.float32)
			y_ext = xp.zeros((batchsize_u * num_types_of_label, num_types_of_label), dtype=xp.float32)
			for n in xrange(num_types_of_label):
				y_ext[n * batchsize_u:(n + 1) * batchsize_u,n] = 1
				unlabeled_x_ext[n * batchsize_u:(n + 1) * batchsize_u] = unlabeled_x.data
			y_ext = Variable(y_ext)
			unlabeled_x_ext = Variable(unlabeled_x_ext)

			# Compute eq.6 -L(x,y) for unlabeled data
			z_mean_u_ext, z_mean_ln_var_u_ext = self.encoder_xy_z(unlabeled_x_ext, y_ext, test=test, apply_f=False)
			z_u_ext = F.gaussian(z_mean_u_ext, z_mean_ln_var_u_ext)
			log_px_zy_u = self.log_px_zy(unlabeled_x_ext, z_u_ext, y_ext, test=test)
			log_py_u = self.log_py(y_ext, test=test)
			if False:
				log_pz_u = self.log_pz(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test)
				log_qz_xy_u = self.log_qz_xy(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test)
				lower_bound_u = lower_bound(log_px_zy_u, log_py_u, log_pz_u, log_qz_xy_u)
			else:
				lower_bound_u = log_px_zy_u + log_py_u - self.gaussian_kl_divergence_keepbatch(z_mean_u_ext, z_mean_ln_var_u_ext)

			# Compute eq.7 sum_y{q(y|x){-L(x,y) + H(q(y|x))}}
			# Let LB(xn, y) be the lower bound for an input image xn and a label y (y = 0, 1, ..., 9).
			# Let bs be the batchsize.
			# 
			# lower_bound_u is a vector and it looks like...
			# [LB(x0,0), LB(x1,0), ..., LB(x_bs,0), LB(x0,1), LB(x1,1), ..., LB(x_bs,1), ..., LB(x0,9), LB(x1,9), ..., LB(x_bs,9)]
			# 
			# After reshaping. (axis 1 corresponds to label, axis 2 corresponds to batch)
			# [[LB(x0,0), LB(x1,0), ..., LB(x_bs,0)],
			#  [LB(x0,1), LB(x1,1), ..., LB(x_bs,1)],
			#                   .
			#                   .
			#                   .
			#  [LB(x0,9), LB(x1,9), ..., LB(x_bs,9)]]
			# 
			# After transposing. (axis 1 corresponds to batch)
			# [[LB(x0,0), LB(x0,1), ..., LB(x0,9)],
			#  [LB(x1,0), LB(x1,1), ..., LB(x1,9)],
			#                   .
			#                   .
			#                   .
			#  [LB(x_bs,0), LB(x_bs,1), ..., LB(x_bs,9)]]
			lower_bound_u = F.transpose(F.reshape(lower_bound_u, (num_types_of_label, batchsize_u)))
			
			y_distribution = self.encoder_x_y(unlabeled_x, test=test, softmax=True)
			lower_bound_u = y_distribution * (lower_bound_u - F.log(y_distribution + 1e-6))

			loss_labeled = -F.sum(lower_bound_l) / batchsize_l
			loss_unlabeled = -F.sum(lower_bound_u) / batchsize_u
			loss = loss_labeled + loss_unlabeled
		else:
			loss_unlabeled = None
			loss_labeled = -F.sum(lower_bound_l) / batchsize_l
			loss = loss_labeled

		return loss, loss_labeled, loss_unlabeled
	def __call__(self, z, y, test=False, apply_f=False):
		mean, ln_var = self.forward_one_step(z, y, test=test, apply_f=False)
		if apply_f:
			return F.gaussian(mean, ln_var)
		return mean, ln_var
Beispiel #60
0
	def decode_yz_a(self, y, z, test=False):
		y = self.to_variable(y)
		z = self.to_variable(z)
		mean, ln_var = self.p_a_yz(y, z, test=test)
		return F.gaussian(mean, ln_var)