Example #1
0
    def _get_elbo_label(self, inp, tgt, msk, y, args):
        """ Build encoder and decoders """
        xlen = tf.to_int32(tf.reduce_sum(msk, axis=1))
        enc_state = self._create_encoder(
                tgt,
                seqlen=xlen,
                scope_name='enc',
                args=args)

        with tf.variable_scope('latent'):
            y_enc_in = tf.contrib.layers.fully_connected(y, args.dim_z, scope='y_enc_in')
            pst_in = tf.concat([y_enc_in, enc_state], axis=1)
            mu_pst = tf.contrib.layers.fully_connected(pst_in, args.dim_z, tf.nn.tanh,
                    scope='mu_posterior')
            logvar_pst = tf.contrib.layers.fully_connected(pst_in, args.dim_z, tf.nn.tanh,
                    scope='logvar_posterior')
            mu_pri = tf.zeros_like(mu_pst)
            logvar_pri = tf.ones_like(logvar_pst)
            dist_pri = tf.contrib.distributions.Normal(mu=mu_pri, sigma=tf.exp(logvar_pri))
            dist_pst = tf.contrib.distributions.Normal(mu=mu_pst, sigma=tf.exp(logvar_pst))
            kl_loss = tf.contrib.distributions.kl(dist_pst, dist_pri)
            kl_loss = tf.reduce_sum(kl_loss, axis=1)

        with st.value_type(st.SampleValue(stop_gradient=False)):
            z_st_pri = st.StochasticTensor(dist_pri, name='z_pri')
            z_st_pst = st.StochasticTensor(dist_pst, name='z_pst')
            z = smart_cond(self.is_training, lambda: z_st_pst, lambda: z_st_pri)
       
        z_ext = tf.contrib.layers.fully_connected(tf.reshape(z, [-1, args.dim_z]), args.num_units, scope='extend_z')
        xlen = tf.to_int32(tf.reduce_sum(msk, axis=1))
        outs, proj, dec_func, cell  = self._create_decoder(
                inp,
                seqlen=xlen,
                label_oh=y,
                init_state=z_ext,
                scope_name='dec',
                args=args)

        # build loss layers
        recons_loss = self._create_softmax_layer(
                proj=proj,
                dec_outs=outs,
                targets=tgt,
                weights=msk,
                scope_name='loss',
                args=args)
        
        return recons_loss, kl_loss
Example #2
0
    def get_loss_u_sample(self, args):
        with tf.variable_scope(args.log_prefix, reuse=True):
            """ unlabel CLASSIFICATION """
            self.logits_u, weights_u = self._create_rnn_classifier(
                self.tgt_u_plh,
                self.msk_u_plh,
                keep_rate=args.keep_rate,
                scope_name='clf',
                args=args)
            self.predict_u = tf.nn.softmax(self.logits_u)
            """ unlabel CVAE """
            with st.value_type(st.SampleValue(stop_gradient=True)):
                y_st = st.StochasticTensor(
                    tf.contrib.distributions.Categorical(p=self.predict_u),
                    name='dist_y',
                    loss_fn=sge.get_score_function_with_baseline())
                recons_loss_u_s, kl_loss_u_s = self._get_elbo_label(
                    self.inp_u_plh, self.tgt_u_plh, self.msk_u_plh, y_st, args)
                recons_loss_u_s = recons_loss_u_s * self.msk_u_plh

                # routing_loss for sampling-based classifier
                if args.use_weights:
                    self._logger.info('Use Reweighting Approach')
                    if args.use_binaryweights:
                        self._logger.info('Use Binary Reweighting Approach')
                        weights_u = tf.cast(
                            tf.greater(
                                weights_u, 1 / tf.reduce_sum(
                                    self.msk_u_plh, axis=1)[:, None, None]),
                            tf.float32)
                    routing_loss = tf.reduce_sum(
                        recons_loss_u_s * self.msk_u_plh * weights_u[:, :, 0],
                        axis=1)
                    routing_loss = routing_loss / tf.reduce_sum(
                        weights_u[:, :, 0], axis=1)
                else:
                    routing_loss = tf.reduce_sum(recons_loss_u_s *
                                                 self.msk_u_plh,
                                                 axis=1)
                    routing_loss = routing_loss / tf.reduce_sum(self.msk_u_plh,
                                                                axis=1)
                routing_loss += kl_loss_u_s * self.kl_w

                # loss for the generator/decoder
                loss_u_of_gen = tf.reduce_sum(recons_loss_u_s, axis=1)
                loss_u_of_gen += kl_loss_u_s * self.kl_w

        with tf.variable_scope(args.log_prefix, reuse=False):
            surrogate_loss = y_st.loss(routing_loss)
            self.entropy_u = tf.losses.softmax_cross_entropy(
                self.predict_u, self.predict_u)

        return tf.reduce_mean(surrogate_loss) + tf.reduce_mean(
            loss_u_of_gen) - self.entropy_u
Example #3
0
def variational_autoencoder(features,
                            n_latent_dim=2,
                            hidden_units=[500, 500],
                            normalizing_flow='identity',
                            flow_n_iter=2,
                            kl_weight=1.0,
                            random_state=123):
    features = tensor_utils.to_tensor(features, dtype=tf.float32)
    kl_weight = tensor_utils.to_tensor(kl_weight, dtype=tf.float32)

    n_features = tensor_utils.get_shape(features)[1]
    with tf.variable_scope('inference_network'):
        q_mu, q_sigma = ops.gaussian_inference_network(
            x=features, n_latent_dim=n_latent_dim, hidden_units=hidden_units)
        #q_mu, q_chol = ops.mvn_inference_network(x=features,
        #                                         n_latent_dim=n_latent_dim,
        #                                         hidden_units=hidden_units)

    # set up the latent variables
    with tf.variable_scope('latent_samples'):
        with st.value_type(st.SampleValue()):
            q_z = st.StochasticTensor(dist=distributions.Normal(mu=q_mu,
                                                                sigma=q_sigma),
                                      name='q_z')
            #q_z = st.StochasticTensor(
            #    dist=distributions.MultivariateNormalCholesky(
            #        mu=q_mu, chol=q_chol),
            #        name='q_z')

        # transform the sample to a more complex density by performing
        # a normalizing flow transformation
        norm_flow = flow_lib.get_flow(normalizing_flow,
                                      n_iter=flow_n_iter,
                                      random_state=random_state)
        q_z_trans, log_det_jac = norm_flow.transform(q_z, features=features)

    # set up the priors
    with tf.variable_scope('prior'):
        prior = distributions.Normal(mu=np.zeros(n_latent_dim,
                                                 dtype=np.float32),
                                     sigma=np.ones(n_latent_dim,
                                                   dtype=np.float32))

    with tf.variable_scope('generative_network'):
        p_x_given_z = ops.bernoulli_generative_network(
            z=q_z_trans, hidden_units=hidden_units, n_features=n_features)

    # set up elbo
    log_likelihood = tf.reduce_sum(p_x_given_z.log_pmf(features), 1)
    kl = tf.reduce_sum(distributions.kl(q_z.distribution, prior), 1)
    neg_elbo = -tf.reduce_mean(log_likelihood + log_det_jac - kl_weight * kl,
                               0)

    return q_mu, tf.identity(neg_elbo, name='neg_elbo')
Example #4
0
    def _build_latent(self):
        config = self.config

        hinputs = self.encoder_logits

        self.latent_mu = fc_layer(hinputs, config.latent_size,
                                  activation_fn=None)

        # Use softplus or exp as activation function.
        self.latent_sd = tf.nn.softplus(fc_layer(
            hinputs, config.latent_size, activation_fn=None))

        if not config.train_encoder:
            self.latent_mu = tf.stop_gradient(self.latent_mu)
            self.latent_sd = tf.stop_gradient(self.latent_sd)

        with st.value_type(st.SampleValue()):
            self.latent_posterior_dist = distributions.Normal(
                self.latent_mu,
                self.latent_sd)

            if config.use_variational:
                self.latent = \
                    st.StochasticTensor(self.latent_posterior_dist)
            else:
                self.latent = self.latent_mu

            if config.use_posterior_mu:
                prior_mu = self.latent_mu
            else:
                prior_mu = tf.zeros_like(self.latent_mu, dtype=tf.float32)

            prior_sd = tf.ones_like(self.latent_sd, dtype=tf.float32) * \
                config.latent_prior_sd

            self.latent_prior_dist = distributions.Normal(prior_mu, prior_sd)
            self.latent_prior = st.StochasticTensor(self.latent_prior_dist)
Example #5
0
    def _get_elbo_label(self, inp, tgt, msk, label, args):
        """ Build encoder and decoders """
        xlen = tf.to_int32(tf.reduce_sum(msk, axis=1))
        enc_state = self._create_encoder(tgt,
                                         seqlen=xlen,
                                         scope_name='enc',
                                         args=args)
        enc_state = tf.nn.dropout(enc_state, self.keep_prob_plh)
        enc_state = tf.contrib.layers.fully_connected(
            enc_state,
            num_outputs=args.num_units,
            activation_fn=None,
            scope='x_to_a')
        enc_state = tf.contrib.layers.batch_norm(
            enc_state,
            center=True,
            scale=True,
            is_training=self.is_training_plh,
            scope='bn_a')
        enc_state = tf.tanh(enc_state)
        enc_state = tf.nn.dropout(enc_state, self.keep_prob_plh)

        label_oh = tf.gather(tf.eye(args.num_classes), label)
        with tf.variable_scope('latent'):
            y_enc_in = tf.contrib.layers.fully_connected(label_oh,
                                                         args.dim_z,
                                                         scope='y_enc_in')
            y_enc_in = tf.nn.dropout(y_enc_in, self.keep_prob_plh)
            pst_in = tf.concat([y_enc_in, enc_state], axis=1)
            pst_in = tf.contrib.layers.fully_connected(pst_in,
                                                       args.num_units,
                                                       None,
                                                       scope='pst_in_dense')
            pst_in = tf.contrib.layers.batch_norm(
                pst_in,
                center=True,
                scale=True,
                is_training=self.is_training_plh,
                scope='pst_in_bn')
            pst_in = tf.tanh(pst_in)
            pst_in = tf.nn.dropout(pst_in, self.keep_prob_plh)
            mu_pst = tf.contrib.layers.fully_connected(pst_in,
                                                       args.dim_z,
                                                       tf.nn.tanh,
                                                       scope='mu_posterior')
            logvar_pst = tf.contrib.layers.fully_connected(
                pst_in, args.dim_z, tf.nn.tanh, scope='logvar_posterior')
            mu_pri = tf.zeros_like(mu_pst)
            logvar_pri = tf.ones_like(logvar_pst)
            dist_pri = tf.contrib.distributions.Normal(
                mu=mu_pri, sigma=tf.exp(logvar_pri))
            dist_pst = tf.contrib.distributions.Normal(
                mu=mu_pst, sigma=tf.exp(logvar_pst))
            kl_loss = tf.contrib.distributions.kl(dist_pst, dist_pri)
            kl_loss = tf.reduce_sum(kl_loss, axis=1)

        with st.value_type(st.SampleValue(stop_gradient=False)):
            z_st_pri = st.StochasticTensor(dist_pri, name='z_pri')
            z_st_pst = st.StochasticTensor(dist_pst, name='z_pst')
            z = smart_cond(self.is_training_plh, lambda: z_st_pst,
                           lambda: z_st_pri)

        z_ext = tf.contrib.layers.fully_connected(tf.reshape(
            z, [-1, args.dim_z]),
                                                  args.num_units,
                                                  scope='extend_z')
        z_ext = tf.nn.dropout(z_ext, self.keep_prob_plh)
        yz = tf.concat([z_ext, label_oh], axis=1)
        yz = tf.contrib.layers.fully_connected(yz,
                                               args.num_units,
                                               None,
                                               scope='yz_dense')
        yz = tf.contrib.layers.batch_norm(yz,
                                          center=True,
                                          scale=True,
                                          is_training=self.is_training_plh,
                                          scope='yz_bn')
        yz = tf.tanh(yz)
        yz = tf.nn.dropout(yz, self.keep_prob_plh)
        xlen = tf.to_int32(tf.reduce_sum(msk, axis=1))
        outs, proj, dec_func, cell = self._create_decoder(
            inp,
            mask=msk,
            label_oh=label_oh,
            init_state=yz,  #tf.contrib.rnn.LSTMStateTuple(yz, yz),
            scope_name='dec',
            args=args)
        outs = tf.nn.dropout(outs, self.keep_prob_plh)

        # build loss layers
        recons_loss = self._create_softmax_layer(proj=proj,
                                                 dec_outs=outs,
                                                 targets=tgt,
                                                 weights=msk,
                                                 scope_name='loss',
                                                 args=args)
        recons_loss = recons_loss * msk

        return recons_loss, kl_loss