Beispiel #1
0
def add_noise(data, noise, dataset):
    noise_type = noise['noise_type']
    if noise_type in ['None', 'none', None]:
        return data
    if noise_type == 'data':
        noise_type = 'bitflip' if dataset['binary'] else 'masked_uniform'

    with tf.name_scope('input_noise'):
        shape = tf.stack([
            s.value if s.value is not None else tf.shape(data)[i]
            for i, s in enumerate(data.get_shape())
        ])

        if noise_type == 'bitflip':
            noise_dist = dist.Bernoulli(probs=noise['prob'], dtype=data.dtype)
            n = noise_dist.sample(shape)
            corrupted = data + n - 2 * data * n  # hacky way of implementing (data XOR n)
        elif noise_type == 'masked_uniform':
            noise_dist = dist.Uniform(low=0., high=1.)
            noise_uniform = noise_dist.sample(shape)

            # sample mask
            mask_dist = dist.Bernoulli(probs=noise['prob'], dtype=data.dtype)
            mask = mask_dist.sample(shape)

            # produce output
            corrupted = mask * noise_uniform + (1 - mask) * data
        else:
            raise KeyError('Unknown noise_type "{}"'.format(noise_type))

        corrupted.set_shape(data.get_shape())
        return corrupted
Beispiel #2
0
    def __init__(self, n_hidden, steps_bias=0., max_rel_logit_change=np.inf, max_logit_change=np.inf, **kwargs):
        """

        :param n_hidden:
        :param steps_bias:
        :param max_rel_logit_change: float; maximum relative logit change since the previous time-step
        :param kwargs:
        """
        super(StepsPredictor, self).__init__()
        self._n_hidden = n_hidden
        self._steps_bias = steps_bias
        self._max_rel_logit_change = max_rel_logit_change
        self._bernoulli = lambda logits: tfd.Bernoulli(logits=logits, dtype=tf.float32, **kwargs)

        with self._enter_variable_scope():

            if max_logit_change != np.inf and max_rel_logit_change != np.inf:
                raise ValueError('Only one of max_logit_change and max_rel_logit_change can be used!')

            if max_rel_logit_change != np.inf:
                max_rel_logit_change = tf.get_variable('max_rel_logit_change',
                                                       shape=[],
                                                       initializer=tf.constant_initializer(max_rel_logit_change),
                                                       trainable=False)
            self._max_rel_logit_change = max_rel_logit_change

            if max_logit_change != np.inf:
                max_logit_change = tf.get_variable('max_logit_change',
                                                       shape=[],
                                                       initializer=tf.constant_initializer(max_logit_change),
                                                       trainable=False)
            self._max_logit_change = max_logit_change
Beispiel #3
0
    def output_function(self, state):
        params = dense_layer(state.h3,
                             self.output_units,
                             scope='gmm',
                             reuse=tf.compat.v1.AUTO_REUSE)
        pis, mus, sigmas, rhos, es = self._parse_parameters(params)
        mu1, mu2 = tf.split(mus, 2, axis=1)
        mus = tf.stack([mu1, mu2], axis=2)
        sigma1, sigma2 = tf.split(sigmas, 2, axis=1)

        covar_matrix = [
            tf.square(sigma1), rhos * sigma1 * sigma2, rhos * sigma1 * sigma2,
            tf.square(sigma2)
        ]
        covar_matrix = tf.stack(covar_matrix, axis=2)
        covar_matrix = tf.reshape(
            covar_matrix,
            (self.batch_size, self.num_output_mixture_components, 2, 2))

        mvn = tfd.MultivariateNormalFullCovariance(
            loc=mus, covariance_matrix=covar_matrix)
        b = tfd.Bernoulli(probs=es)
        c = tfd.Categorical(probs=pis)

        sampled_e = b.sample()
        sampled_coords = mvn.sample()
        sampled_idx = c.sample()

        idx = tf.stack([tf.range(self.batch_size), sampled_idx], axis=1)
        coords = tf.gather_nd(sampled_coords, idx)
        return tf.concat([coords, tf.cast(sampled_e, tf.float32)], axis=1)
Beispiel #4
0
    def construct_masked_inputs(self):
        """
        Here. we should either define ALL the placeholders we'll ever need, or expect people to subclass.
        Subclassing is probably cleaner.

        Must set fields:
            self.mask
                The mask sample.
            self.network_input:
                The masked input
            self.remaining_input
                The part of the positive input that wasn't masked
        
        """
        masker = ds.Bernoulli(probs=self.keep_prob_ph, dtype=tf.float32)
        mask_shape = [self.batch_size, self.input_dim]
        mask = masker.sample(sample_shape=mask_shape)
        reverse_mask = (
            1 - mask
        )  #Only leaves the things that aren't in the original input.
        network_input = (self.batch_of_users * mask)
        remaining_input = (self.batch_of_users * reverse_mask)

        number_of_good_items = tf.reduce_sum(self.batch_of_users, axis=-1)
        number_of_unseen_items = tf.reduce_sum(remaining_input, axis=-1)
        number_of_seen_items = tf.reduce_sum(network_input, axis=-1)

        self.mask = mask
        self.network_input = network_input
        self.remaining_input = remaining_input
        self.number_of_good_items = number_of_good_items
        self.number_of_unseen_items = number_of_unseen_items
        self.number_of_seen_items = number_of_seen_items
    def decode(self,prev_state,prev_input,timestep):
        with tf.variable_scope("loop"):
            if timestep > 0:
                tf.get_variable_scope().reuse_variables()

            # Run the cell on a combination of the previous input and state
            output, state = self.cell(prev_input,prev_state)

            # mask before masked-scores
            position = tf.ones([prev_input.shape[0]]) * timestep
            position = tf.cast(position, tf.int32)

            # Update mask
            self.mask = tf.one_hot(position, self.seq_length)

            # Attention mechanism
            masked_scores = self.attention(self.encoder_output, output)

            # we cast to Bernoulli and sample
            prob = distr.Bernoulli(masked_scores)
            sampled_arr = prob.sample() # Batch_size, seqlenght for just one node

            self.samples.append(sampled_arr)
            self.mask_scores.append(masked_scores)

            if timestep == 0:
                self.first_city = position
                self.first_city_hot = tf.one_hot(self.first_city, self.seq_length)

            # Retrieve decoder's new input
            new_decoder_input = tf.gather(self.h,position)[0]

            return state, new_decoder_input
Beispiel #6
0
def make_decoder(z, x_shape=(1, 20, 1)):
    '''
    Decoder: p(x|z)
    '''
    net = make_nn(z, 20)
    logits = tf.reshape(net, tf.concat([[-1], x_shape], axis=0))
    return tfd.Independent(tfd.Bernoulli(logits))
Beispiel #7
0
def _make_decoder(code, data_shape):
    with tf.variable_scope('decoder'):
        x = code
        x = tf.layers.dense(x, 200, tf.nn.relu)
        x = tf.layers.dense(x, 200, tf.nn.relu)
        logit = tf.layers.dense(x, _prod(data_shape))
        logit = tf.reshape(logit, [-1] + data_shape)
        return tfd.Independent(tfd.Bernoulli(logit), 2)
Beispiel #8
0
    def sample(self, n=None):
        if self._bernoulli is None:
            self._bernoulli = tfd.Bernoulli(self._steps_probs)

        sample = self._bernoulli.sample(n)
        sample = tf.cumprod(sample, tf.rank(sample) - 1)
        sample = tf.reduce_sum(sample, -1)
        return sample
Beispiel #9
0
def make_decoder(code, data_shape):

    x = code
    x = tf.layers.dense(x, hidden, tf.nn.relu)
    x = tf.layers.dense(x, hidden, tf.nn.relu)
    logit = tf.layers.dense(x, np.prod(data_shape))
    logit = tf.reshape(logit, [-1] + data_shape)

    return tfd.Independent(tfd.Bernoulli(logit), 2)
Beispiel #10
0
    def decode(self, encoder_output):
        # encoder_output is a tensor of size [batch_size, max_length, input_embed]
        with tf.variable_scope('singe_layer_nn'):
            W_l = tf.get_variable('weights_left', [self.input_embed, self.decoder_hidden_dim], initializer=self.initializer)
            W_r = tf.get_variable('weights_right', [self.input_embed, self.decoder_hidden_dim], initializer=self.initializer)
            U = tf.get_variable('U', [self.decoder_hidden_dim], initializer=self.initializer)    # Aggregate across decoder hidden dim

        dot_l = tf.einsum('ijk, kl->ijl', encoder_output, W_l)#BTBT 把encoder出来的output[batch_siz,var_siz,encode_hidden_dim]转成[batch_siz,var_siz,decode_hidden_dim]
        dot_r = tf.einsum('ijk, kl->ijl', encoder_output, W_r)
        exp_l = tf.expand_dims(dot_l, axis=2) #BTBT [batch_siz,var_siz,1,decoder_hid] expand_dim中axis参数的意思是再那个维度插入(扩展)一维
        exp_r = tf.expand_dims(dot_r, axis=1) #BTBT [batch_siz,1,var_siz,decoder_hid]
        tiled_l = tf.tile(exp_l, (1, 1, self.max_length, 1))
        tiled_r = tf.tile(exp_r, (1, self.max_length, 1, 1))

        if self.decoder_activation == 'tanh':    # Original implementation by paper
            final_sum = tf.nn.tanh(tiled_l + tiled_r)
        elif self.decoder_activation == 'relu':
            final_sum = tf.nn.relu(tiled_l + tiled_r)
        elif self.decoder_activation == 'none':    # Without activation function
            final_sum = tiled_l + tiled_r
        else:
            raise NotImplementedError('Current decoder activation is not implemented yet')

        # final_sum is of shape (batch_size, max_length, max_length, decoder_hidden_dim) #BTBT [batch_siz,var_siz,var_siz,decoder_hid]
        logits = tf.einsum('ijkl, l->ijk', final_sum, U)    # Readability

        if self.bias_initial_value is None:    # Randomly initialize the learnable bias
            self.logit_bias = tf.get_variable('logit_bias', [1])
        elif self.use_bias_constant:    # Constant bias
            self.logit_bias =  tf.constant([self.bias_initial_value], tf.float32, name='logit_bias')
        else:    # Learnable bias with initial value
            if self.use_bias: #BTBT [BUGFIX] 使用bias时才初始化它
                self.logit_bias =  tf.Variable([self.bias_initial_value], tf.float32, name='logit_bias')

        if self.use_bias:    # Bias to control sparsity/density
            logits += self.logit_bias

        self.adj_prob = logits

        for i in range(self.max_length):
            position = tf.ones([encoder_output.shape[0]]) * i
            position = tf.cast(position, tf.int32)

            # Update mask
            self.mask = tf.one_hot(position, self.max_length)

            masked_score = self.adj_prob[:,i,:] - 100000000.*self.mask #BTBT avoid self-loop
            prob = distr.Bernoulli(masked_score)    # probs input probability, logit input log_probability

            sampled_arr = prob.sample()    # Batch_size, seqlenght for just one node

            self.samples.append(sampled_arr)
            self.mask_scores.append(masked_score)
            self.entropy.append(prob.entropy())

        return self.samples, self.mask_scores, self.entropy
Beispiel #11
0
def make_decoder(z, x_shape=(x_dim,)):
    '''
    Decoder: p(x|z)
    '''
    with tf.variable_scope("decoder"):
        net = make_nn(z, x_dim)
        print('decoder net', net)
        logits = tf.reshape(net, tf.concat([[nb_z_samples, -1], x_shape], axis=0))  # For the batch
        print('logits', logits)
        return tfd.Independent(tfd.Bernoulli(logits), reinterpreted_batch_ndims=1)
Beispiel #12
0
def generative_model(observations, samples, is_training, latent_layer_dims,
                     nn_layers):
    samples = list(reversed(samples))
    latent_layer_dims = list(reversed(latent_layer_dims))

    mu, sigma_sq = generator_net(samples[0], is_training, nn_layers[0],
                                 nn_layers[1], latent_layer_dims[0],
                                 'gaussian')
    mean_list = [mu]
    var_list = [sigma_sq]
    p_lls = []
    p_gen = None

    # reconstruction of training samples
    for i in range(1, len(samples) - 1):
        mu, sigma_sq = generator_net(samples[i], is_training, nn_layers[0],
                                     nn_layers[1], latent_layer_dims[i],
                                     'gaussian')
        p_lls.append(dist.MultivariateNormalDiag(mu, sigma_sq))

        mean_list.append(mu)
        var_list.append(sigma_sq)

    probs = generator_net(samples[-1],
                          is_training,
                          nn_layers[0],
                          nn_layers[1],
                          observations.get_shape().as_list()[1],
                          likelihood='bernoulli')
    p_x = bernoulli_log_likelihood(observations, probs)

    # generation of novel samples
    sample_gen = tf.random_uniform([16], maxval=11, dtype=tf.int32)
    sample_gen = tf.one_hot(sample_gen, 10)
    mu_gen, sigma_sq_gen = generator_net(sample_gen, is_training, nn_layers[0],
                                         nn_layers[1], latent_layer_dims[0],
                                         'gaussian')
    gen_samples = [dist.MultivariateNormalDiag(mu_gen, sigma_sq_gen).sample()]

    for i in range(1, len(latent_layer_dims) - 1):
        mu, sigma_sq = generator_net(samples[i], is_training, nn_layers[0],
                                     nn_layers[1], latent_layer_dims[i],
                                     'gaussian')
        gen_samples.append(dist.MultivariateNormalDiag(mu, sigma_sq).sample())

    probs = generator_net(gen_samples[-1],
                          is_training,
                          nn_layers[0],
                          nn_layers[1],
                          observations.get_shape().as_list()[1],
                          likelihood='bernoulli')
    p_gen = dist.Bernoulli(probs=probs).sample()

    return probs, p_gen, p_x, mean_list, var_list
Beispiel #13
0
    def _build(self, timestep, previous_presence, *_):
        is_first = tf.cast(tf.equal(timestep, 0), tf.float32)

        if self.discovery:
            logits = 88. * is_first + (1 - is_first) * -88.
        else:
            logits = -88. * is_first + (1 - is_first) * 88.

        logits = logits * tf.ones_like(previous_presence)

        return tfd.Bernoulli(logits=logits, dtype=tf.float32)
Beispiel #14
0
def bernoulli_generative_network(z, hidden_units, n_features):
    with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu):
        net = slim.stack(z,
                         slim.fully_connected,
                         hidden_units,
                         scope='decoder_network')
        bernoulli_logits = slim.fully_connected(net,
                                                n_features,
                                                activation_fn=None)

        return distributions.Bernoulli(logits=bernoulli_logits)
Beispiel #15
0
    def _build(self, inputs, hvar_labels, n_samples=10, analytic_kl=True):
        datum_shape = inputs.get_shape().as_list()[1:]
        enc_repr = self._encoder(inputs)

        self.hvar_prior = tfd.ExpRelaxedOneHotCategorical(
            temperature=self._temperature, logits=hvar_labels)
        self.hvar_posterior = tfd.ExpRelaxedOneHotCategorical(
            temperature=self._temperature, logits=self._hvar(enc_repr))
        hvar_sample_shape = [n_samples
                             ] + self.hvar_posterior.batch_shape.as_list(
                             ) + self.hvar_posterior.event_shape.as_list()
        hvar_sample = tf.reshape(self.hvar_posterior.sample(n_samples),
                                 hvar_sample_shape)

        self.latent_posterior = self._latent_posterior_fn(
            self._loc(enc_repr), self._scale(enc_repr))
        latent_posterior_sample = self.latent_posterior.sample(n_samples)

        joint_sample = tf.concat([hvar_sample, latent_posterior_sample],
                                 axis=-1)

        sample_decoder = snt.BatchApply(self._decoder)
        self.output_distribution = tfd.Independent(
            tfd.Bernoulli(logits=sample_decoder(joint_sample)),
            reinterpreted_batch_ndims=len(datum_shape))

        distortion = -self.output_distribution.log_prob(inputs)
        if analytic_kl and n_samples == 1:
            rate = tfd.kl_divergence(self.latent_posterior, self.latent_prior)
        else:
            rate = (self.latent_posterior.log_prob(latent_posterior_sample) -
                    self.latent_prior.log_prob(latent_posterior_sample))
        hrate = self.hvar_posterior.log_prob(
            hvar_sample) - self.hvar_prior.log_prob(hvar_sample)
        # hrate = tf.Print(hrate, [temperature])
        # hrate = tf.Print(hrate, [hvar_sample], summarize=10)
        # hrate = tf.Print(hrate, [self.hvar_posterior.log_prob(hvar_sample)])
        # hrate = tf.Print(hrate, [self.hvar_prior.log_prob(hvar_sample)])
        # hrate = tf.Print(hrate, [hrate], summarize=10)
        elbo_local = -(rate + hrate + distortion)
        self.elbo = tf.reduce_mean(elbo_local)
        self.importance_weighted_elbo = tf.reduce_mean(
            tf.reduce_logsumexp(elbo_local, axis=0) -
            tf.log(tf.to_float(n_samples)))

        self.hvar_sample = tf.exp(tf.split(hvar_sample, n_samples)[0])
        self.hvar_cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=hvar_labels, logits=tf.split(hvar_sample, n_samples)[0])
        self.hvar_labels = hvar_labels
        self.distortion = distortion
        self.rate = rate
        self.hrate = hrate
Beispiel #16
0
 def __init__(self, feature_ids, probs=None):
     super().__init__(feature_ids)
     if probs is None:
         self.probs = tf.random_uniform([len(feature_ids)],
                                        minval=0,
                                        maxval=1,
                                        dtype=spn_type)
     else:
         self.probs = tf.constant(probs, dtype=spn_type)
     self.probs = tf.Variable(tf.log(self.probs),
                              trainable=True,
                              dtype=spn_type)
     self.distributions = dist.Bernoulli(logits=self.probs)
Beispiel #17
0
    def __init__(self, region, args, name,
                 given_params=None, num_dims=0):
        super().__init__(name)
        self.local_size = len(region)
        self.args = args
        self.scope = sorted(list(region))
        self.size = args.num_gauss
        self.num_dims = num_dims
        self.np_params = None
        self.params = self.args.param_provider.grab_leaf_parameters(
            self.scope,
            args.num_gauss,
            name=name + "_p")

        self.dist = dists.Bernoulli(logits=self.params)
Beispiel #18
0
    def __init__(self, region, args, name, given_params=None, p=-0.7):
        super().__init__(name)
        self.local_size = len(region)
        self.args = args
        self.scope = sorted(list(region))
        self.size = args.num_univ_distros

        self.probs = bernoulli_variable_with_weight_decay(
            name + "_bernoulli_params",
            shape=[1, self.local_size, self.size],
            wd=args.gauss_param_l2,
            p=p,
            values=given_params,
        )

        self.dist = dists.Bernoulli(logits=self.probs)
Beispiel #19
0
    def decode(self, encoder_output):
        # encoder_output is a tensor of size [batch_size, max_length, input_embed]
        with tf.variable_scope('bilinear'):
            W = tf.get_variable('bilinear_weights',
                                [self.input_embed, self.input_embed],
                                initializer=self.initializer)

        logits = tf.einsum('ijk, kn, imn->ijm', encoder_output, W,
                           encoder_output)  # Readability

        if self.bias_initial_value is None:  # Randomly initialize the learnable bias
            self.logit_bias = tf.get_variable('logit_bias', [1])
        elif self.use_bias_constant:  # Constant bias
            self.logit_bias = tf.constant([self.bias_initial_value],
                                          tf.float32,
                                          name='logit_bias')
        else:  # Learnable bias with initial value
            self.logit_bias = tf.Variable([self.bias_initial_value],
                                          tf.float32,
                                          name='logit_bias')

        if self.use_bias:  # Bias to control sparsity/density
            logits += self.logit_bias

        self.adj_prob = logits

        for i in range(self.max_length):
            position = tf.ones([encoder_output.shape[0]]) * i
            position = tf.cast(position, tf.int32)

            # Update mask
            self.mask = tf.one_hot(position, self.max_length)

            masked_score = self.adj_prob[:, i, :] - 100000000. * self.mask
            prob = distr.Bernoulli(
                masked_score
            )  # probs input probability, logit input log_probability

            sampled_arr = prob.sample(
            )  # Batch_size, seqlenght for just one node

            self.samples.append(sampled_arr)
            self.mask_scores.append(masked_score)
            self.entropy.append(prob.entropy())

        return self.samples, self.mask_scores, self.entropy
Beispiel #20
0
    def _build_model(self):
        # input points
        self.x = tf.placeholder(tf.float32, shape=[None, int(np.prod(self.x_dims))], name="X")
        self.noise = tf.placeholder(tf.float32, shape=[None, self.z_dim], name="noise")
        self.p_z = dbns.Normal(loc=tf.zeros_like(self.noise), scale=tf.ones_like(self.noise))

        # encoder
        z_params = self.encoder(self.x)
        z_mu = z_params[:, self.z_dim:]
        z_sigma = tf.exp(z_params[:, :self.z_dim])
        self.q_z = dbns.Normal(loc=z_mu, scale=z_sigma)

        # reparameterization trick
        z = z_mu + tf.multiply(z_sigma, self.p_z.sample())
        # z = self.q_z.sample()

        # decoder
        self.x_hat = self.decoder(z)
        self.p_x_z = dbns.Bernoulli(logits=self.x_hat)

        nll_loss = -tf.reduce_sum(self.x * tf.log(1e-8 + self.x_hat) +
                                  (1 - self.x) * tf.log(1e-8 + 1 - self.x_hat), 1)  # Bernoulli nll
        kl_loss = 0.5 * tf.reduce_sum(tf.square(z_mu) + tf.square(z_sigma) - tf.log(1e-8 + tf.square(z_sigma)) - 1, 1)
        # kl_loss = tf.reduce_sum(dbns.kl_divergence(self.q_z, self.p_z), 1)
        self.loss = tf.reduce_mean(nll_loss + kl_loss)
        self.elbo = -1.0 * tf.reduce_mean(nll_loss + kl_loss)

        # in original paper, lr chosen from {0.01, 0.02, 0.1} depending on first few iters training performance
        optimizer = tf.train.AdagradOptimizer(learning_rate=self.lr)
        self.train_op = optimizer.minimize(self.loss)

        # for sampling
        self.z = self.encoder(self.x, trainable=False, reuse=True)
        self.z_pl = tf.placeholder(tf.float32, shape=[None, self.z_dim])
        self.sample = self.decoder(self.z_pl, trainable=False, reuse=True)

        # tensorboard summaries
        x_img = tf.reshape(self.x, [-1] + self.x_dims)
        tf.summary.image('data', x_img)
        xhat_img = tf.reshape(self.x_hat, [-1] + self.x_dims)
        tf.summary.image('reconstruction', xhat_img)
        tf.summary.scalar('reconstruction_loss', tf.reduce_mean(nll_loss))
        tf.summary.scalar('kl_loss', tf.reduce_mean(kl_loss))
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('elbo', self.elbo)
        self.merged = tf.summary.merge_all()
Beispiel #21
0
    def _create_dropout_mask(self,
                             keep_prob,
                             shape,
                             log=True,
                             name="DropoutMask"):
        """Creates a dropout mask with values drawn from a Bernoulli distribution with parameter
        ``keep_prob``.

        Args:
            keep_prob (Tensor): A float ``Tensor`` indicating the probability of keeping an element
                active.
            shape (Tensor): A 1D ``Tensor`` specifying the shape of the

        """
        with tf.name_scope(name):
            mask = tfd.Bernoulli(probs=keep_prob, dtype=conf.dtype, name="DropoutMaskBernoulli")\
                .sample(sample_shape=shape)
            return tf.log(mask) if log else mask
Beispiel #22
0
def add_noise(data, noise):
    noise_type = noise['noise_type']
    if noise_type in ['None', 'none', None]:
        return data

    with tf.name_scope('input_noise'):
        shape = tf.stack([
            s.value if s.value is not None else tf.shape(data)[i]
            for i, s in enumerate(data.get_shape())
        ])

        if noise_type == 'bitflip':
            noise_dist = dist.Bernoulli(probs=noise['prob'], dtype=data.dtype)
            n = noise_dist.sample(shape)
            corrupted = data + n - 2 * data * n  # hacky way of implementing (data XOR n)
        else:
            raise KeyError('Unknown noise_type "{}"'.format(noise_type))

        corrupted.set_shape(data.get_shape())
        return corrupted
Beispiel #23
0
    def construct_masked_inputs(self):
        masker = ds.Bernoulli(probs=self.keep_prob_ph, dtype=tf.float32)
        mask_shape = [self.batch_size, self.input_dim]
        mask = masker.sample(sample_shape=mask_shape)
        reverse_mask = (
            1 - mask
        )  #Only leaves the things that aren't in the original input.
        network_input = (self.batch_of_users[:, :self.input_dim] * mask)
        remaining_input = (self.batch_of_users[:, :self.input_dim] *
                           reverse_mask)

        number_of_good_items = tf.reduce_sum(
            self.batch_of_users[:, :self.input_dim], axis=-1)
        number_of_unseen_items = tf.reduce_sum(remaining_input, axis=-1)
        number_of_seen_items = tf.reduce_sum(network_input, axis=-1)

        self.mask = mask
        self.network_input = tf.concat(
            [network_input, self.batch_of_users[:, self.input_dim:]],
            1)  # masked input (input for actors)
        self.remaining_input = remaining_input  # reverse masked input
        self.number_of_good_items = number_of_good_items  # feature H0
        self.number_of_unseen_items = number_of_unseen_items  # feature H1
        self.number_of_seen_items = number_of_seen_items
Beispiel #24
0
 def _make_step_posterior(self, presence_prob, presence_logit):  # pylint disable=unused-variable
     return tfd.Bernoulli(logits=tf.squeeze(presence_logit, -1))
 def _test(self, probs, n):
     rv = Bernoulli(probs)
     dist = ds.Bernoulli(probs)
     self.assertEqual(rv.sample(n).shape, dist.sample(n).shape)
Beispiel #26
0
    def decode(self, encoder_output):
        # encoder_output is a tensor of size [batch_size, max_length, input_embed]
        with tf.variable_scope('ntn'):
            W = tf.get_variable(
                'bilinear_weights',
                [self.input_embed, self.input_embed, self.decoder_hidden_dim],
                initializer=self.initializer)
            W_l = tf.get_variable('weights_left',
                                  [self.input_embed, self.decoder_hidden_dim],
                                  initializer=self.initializer)
            W_r = tf.get_variable('weights_right',
                                  [self.input_embed, self.decoder_hidden_dim],
                                  initializer=self.initializer)
            U = tf.get_variable('U', [self.decoder_hidden_dim],
                                initializer=self.initializer)
            B = tf.get_variable('bias', [self.decoder_hidden_dim],
                                initializer=self.initializer)

        # Compute linear output with shape (batch_size, max_length, max_length, decoder_hidden_dim)
        dot_l = tf.einsum('ijk, kl->ijl', encoder_output, W_l)
        dot_r = tf.einsum('ijk, kl->ijl', encoder_output, W_r)
        tiled_l = tf.tile(tf.expand_dims(dot_l, axis=2),
                          (1, 1, self.max_length, 1))
        tiled_r = tf.tile(tf.expand_dims(dot_r, axis=1),
                          (1, self.max_length, 1, 1))
        linear_sum = tiled_l + tiled_r

        # Compute bilinear product with shape (batch_size, max_length, max_length, decoder_hidden_dim)
        bilinear_product = tf.einsum('ijk, knl, imn->ijml', encoder_output, W,
                                     encoder_output)

        if self.decoder_activation == 'tanh':  # Original implementation by paper
            final_sum = tf.nn.tanh(bilinear_product + linear_sum + B)
        elif self.decoder_activation == 'relu':
            final_sum = tf.nn.relu(bilinear_product + linear_sum + B)
        elif self.decoder_activation == 'none':  # Without activation function
            final_sum = bilinear_product + linear_sum + B
        else:
            raise NotImplementedError(
                'Current decoder activation is not implemented yet')

        logits = tf.einsum('ijkl, l->ijk', final_sum, U)  # Readability

        if self.bias_initial_value is None:  # Randomly initialize the learnable bias
            self.logit_bias = tf.get_variable('logit_bias', [1])
        elif self.use_bias_constant:  # Constant bias
            self.logit_bias = tf.constant([self.bias_initial_value],
                                          tf.float32,
                                          name='logit_bias')
        else:  # Learnable bias with initial value
            self.logit_bias = tf.Variable([self.bias_initial_value],
                                          tf.float32,
                                          name='logit_bias')

        if self.use_bias:  # Bias to control sparsity/density
            logits += self.logit_bias

        self.adj_prob = logits

        for i in range(self.max_length):
            position = tf.ones([encoder_output.shape[0]]) * i
            position = tf.cast(position, tf.int32)

            # Update mask
            self.mask = tf.one_hot(position, self.max_length)

            masked_score = self.adj_prob[:, i, :] - 100000000. * self.mask
            prob = distr.Bernoulli(
                masked_score
            )  # probs input probability, logit input log_probability

            sampled_arr = prob.sample(
            )  # Batch_size, seqlenght for just one node

            self.samples.append(sampled_arr)
            self.mask_scores.append(masked_score)
            self.entropy.append(prob.entropy())

        return self.samples, self.mask_scores, self.entropy
 def _test(self, probs, n):
     rv = Bernoulli(probs)
     dist = ds.Bernoulli(probs)
     x = rv.sample(n).eval()
     self.assertAllEqual(rv.log_prob(x).eval(), dist.log_prob(x).eval())
Beispiel #28
0
    def _create_loss_optimizer(self): 
        
        all_variables = dict()
        
        all_variables['tao']=tf.Variable(temperature,name="temperature")      
       
        
        logits_theta=self._encoder_network_theta()
        self.q_theta = tf.nn.softmax(logits_theta)

        log_q_theta = tf.log(self.q_theta+1e-20)
       
       
        self.theta = gumbel_softmax(logits_theta,all_variables['tao'])
        
        
        logits_c_1=self._encoder_network_c()
        
        logits_c=tf.reshape(logits_c_1,[-1,n_reliability_d1,n_reliability_d2])
        
        
        eps=tf.random_normal((n_agents,n_reliability_d1,n_reliability_d2),0,1,dtype=tf.float32)
        
        
        
        self.c=logits_c+tf.multiply(eps,variance_q_c_given_M)
        
        self.c_flatten=tf.reshape(self.c,[-1,n_reliability]) 
        
      
        
        
        logits_M_1 = self._decoder_network()


        logits_M= logits_M_1        
        
        
        decay_theta = tf.Variable(1, trainable=False, dtype=tf.float32)

        self.decay_theta_op = decay_theta.assign(decay_theta*0.9 )
        
        mean_p_prior_c=tf.tensordot(self.var_Variational_s,self.var_Variational_ctilder_mean,[1,0])
        
        
        p_M = ds.Bernoulli(logits=logits_M)

        kl_theta_tmp = self.q_theta*(log_q_theta-tf.log(self.MV_distribution+1e-20))
        KL_theta = tf.reduce_sum(kl_theta_tmp)
        
        
        kl_c_tmp = 0.5*(-1-tf.log(variance_q_c_given_M)+2*tf.square(self.c-mean_p_prior_c)/variance_p_c_given_ctilder+variance_q_c_given_M/variance_p_c_given_ctilder)#- p_prior_log_c.log_prob(self.log_c)
        KL_c = tf.reduce_sum(kl_c_tmp)

        elbo=tf.reduce_sum(tf.multiply(p_M.log_prob(self.M_onehot),M_nan_or_not)) - decay_theta* KL_theta- KL_c

        self.cost=-elbo   
        
        l2_loss=tf.losses.get_regularization_loss()

        self.cost=self.cost+l2_loss      
        # Use ADAM optimizer
        self.optimizer = \
            tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
Beispiel #29
0
def network_train():
    with tf.variable_scope('data'):
        x = tf.placeholder(tf.float32, [None, 28, 28, 1])
    with tf.name_scope('variational'):
        q_mu, q_sigma = Encoder(x,
                                latent_dim=FLAGS.latent_dim,
                                hidden_size=FLAGS.hidden_size)
        q_z = distributions.Normal(loc=q_mu, scale=q_sigma)
        assert q_z.reparameterization_type == distributions.FULLY_REPARAMETERIZED
    with tf.variable_scope('model'):
        p_xIz_logits = Decoder(q_z.sample(), hidden_size=FLAGS.hidden_size)
        p_xIz = distributions.Bernoulli(logits=p_xIz_logits)
        posterior_predictive_samples = p_xIz.sample()
    with tf.variable_scope('model', reuse=True):
        p_z = distributions.Normal(loc=np.zeros(FLAGS.latent_dim,
                                                dtype=np.float32),
                                   scale=np.ones(FLAGS.latent_dim,
                                                 dtype=np.float32))
        p_z_sample = p_z.sample(FLAGS.n_samples)
        p_xIz_logits = Decoder(p_z_sample, hidden_size=FLAGS.hidden_size)
        prior_predictive = distributions.Bernoulli(logits=p_xIz_logits)
        prior_predictive_samples = prior_predictive.sample()
    with tf.variable_scope('model', reuse=True):
        z_input = tf.placeholder(tf.float32, [None, FLAGS.latent_dim])
        p_xIz_logits = Decoder(z_input, hidden_size=FLAGS.hidden_size)
        prior_predictive_inp = distributions.Bernoulli(logits=p_xIz_logits)
        prior_predictive_inp_sample = prior_predictive_inp.sample()
    kl = tf.reduce_sum(distributions.kl(q_z, p_z), 1)
    e_log_likelihood = tf.reduce_sum(p_xIz.log_prob(x), [1, 2, 3])
    elbo = tf.reduce_sum(e_log_likelihood - kl, 0)
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(-elbo)
    init_op = tf.global_variables_initializer()
    sess = tf.InteractiveSession()
    sess.run(init_op)
    mnist = read_data_sets(FLAGS.data_dir)
    print('Saving images to: %s' % FLAGS.fig_dir)
    plot_elbo = []
    for i in range(FLAGS.n_episodes):
        batch_x, _ = mnist.train.next_batch(FLAGS.batch_size)
        batch_x = batch_x.reshape(FLAGS.batch_size, 28, 28, 1)
        batch_x = (batch_x > 0.5).astype(np.float32)
        sess.run(optimizer, {x: batch_x})
        batch_elbo = sess.run(elbo, {x: batch_x})
        plot_elbo.append(batch_elbo / float(FLAGS.batch_size))
        if i % 1000 == 0:
            batch_elbo = sess.run(elbo, {x: batch_x})
            print('Episode: {0:d} ELBO: {1: .3f}'.format(
                i, batch_elbo / FLAGS.batch_size))
            batch_posterior_predictive_samples, batch_prior_predictive_samples = sess.run(
                [posterior_predictive_samples, prior_predictive_samples],
                {x: batch_x})
            for k in range(FLAGS.n_samples):
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_data_%d.jpg' % (i, k))
                imsave(f_name, batch_x[k, :, :, 0])
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_posterior_%d.jpg' % (i, k))
                imsave(f_name, batch_posterior_predictive_samples[k, :, :, 0])
                f_name = os.path.join(FLAGS.fig_dir,
                                      'episode_%d_prior_%d.jpg' % (i, k))
                imsave(f_name, batch_prior_predictive_samples[k, :, :, 0])
    plt.plot(range(len(plot_elbo)), plot_elbo)
    plt.show()
Beispiel #30
0
    def p_y_xz(self, x, z_stacked, TD, mode):
        # x is [bs/nbs, 2*enc_rnn_dim]
        # z_stacked is [k, bs/nbs, N*K]    (at EVAL or PREDICT time, k (=self.sample_ct) may be hps.k, K**N or sample_ct)
        # in this function, rnn decoder inputs are of the form: z + x + car1 + car2 (note: first 3 are "extras" to help with learning)
        ph = self.hps.prediction_horizon

        k, GMM_c, pred_dim = self.sample_ct, self.hps.GMM_components, self.pred_dim
        with tf.variable_scope("p_y_xz") as varscope:
            z = tf.reshape(z_stacked,
                           [-1, self.latent.z_dim])  # [k;bs/nbs, z_dim]
            zx = tf.concat([z, tf.tile(x, [k, 1])],
                           axis=1)  # [k;bs/nbs, z_dim + 2*enc_rnn_dim]

            cell = stacked_rnn_cell(self.hps.rnn_cell,
                                    self.hps.rnn_cell_kwargs,
                                    self.hps.dec_rnn_dim,
                                    self.hps.rnn_io_dropout_keep_prob, mode)
            initial_state = project_to_RNN_initial_state(cell, zx)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if self.hps.sample_model_during_dec and mode == tf.estimator.ModeKeys.TRAIN:
                    input_ = tf.concat(
                        [zx, tf.tile(TD["joint_present"], [k, 1])], axis=1
                    )  # [k;bs, N*K + 2*enc_rnn_dim + pred_dim+state_dim]
                    state = initial_state
                    with tf.variable_scope("rnn") as rnnscope:
                        log_pis, mus, log_sigmas, corrs = [], [], [], []
                        for j in range(ph):
                            if j > 0:
                                rnnscope.reuse_variables()
                            output, state = cell(input_, state)
                            log_pi_t, mu_t, log_sigma_t, corr_t = project_to_GMM_params(
                                output, GMM_c, pred_dim,
                                self.hps.dec_GMM_proj_MLP_dims)
                            y_t = GMM2D(log_pi_t, mu_t, log_sigma_t, corr_t,
                                        self.hps.log_sigma_min,
                                        self.hps.log_sigma_max).sample(
                                        )  # [k;bs, pred_dim]
                            mask = distributions.Bernoulli(
                                probs=self.dec_sample_model_prob,
                                dtype=tf.float32).sample(
                                    (tf.shape(y_t)[0], 1))  # maybe tf.shape
                            y_t = mask * y_t + (1 - mask) * tf.tile(
                                TD["car2_future"][:, j, :], [k, 1])
                            log_pis.append(log_pi_t)
                            mus.append(mu_t)
                            log_sigmas.append(log_sigma_t)
                            corrs.append(corr_t)
                            car_inputs = tf.concat(
                                [
                                    tf.tile(TD["car1_future"][:, j, :],
                                            [k, 1]), y_t
                                ],
                                axis=1)  # [k;bs, state_dim + pred_dim]
                            input_ = tf.concat(
                                [zx, car_inputs], axis=1
                            )  # [k;bs, N*K + 2*enc_rnn_dim + state_dim + pred_dim]
                        log_pis = tf.stack(log_pis,
                                           axis=1)  # [k;bs, ph, GMM_c]
                        mus = tf.stack(mus,
                                       axis=1)  # [k;bs, ph, GMM_c*pred_dim]
                        log_sigmas = tf.stack(
                            log_sigmas, axis=1)  # [k;bs, ph, GMM_c*pred_dim]
                        corrs = tf.stack(corrs, axis=1)  # [k;bs, ph, GMM_c]
                else:
                    zx_with_time_dim = tf.expand_dims(
                        zx, 1)  # [k;bs/nbs, 1, N*K + 2*enc_rnn_dim]
                    zx_time_tiled = tf.tile(
                        zx_with_time_dim,
                        [1, ph, 1])  # [k;bs/nbs, ph, N*K + 2*enc_rnn_dim]
                    car_inputs = tf.concat(
                        [  # [bs/nbs, ph, 2*state_dim]
                            tf.expand_dims(
                                TD["joint_present"],
                                1),  # [bs/nbs, 1, state_dim+pred_dim]
                            tf.concat(
                                [
                                    TD["car1_future"][:, :ph - 1, :],
                                    TD["car2_future"][:, :ph - 1, :]
                                ],
                                axis=2)  # [bs/nbs, ph-1, state_dim+pred_dim]
                        ],
                        axis=1)
                    inputs = tf.concat(
                        [zx_time_tiled,
                         tf.tile(car_inputs, [k, 1, 1])],
                        axis=2
                    )  # [k;bs/nbs, ph, N*K + 2*enc_rnn_dim + pred_dim + state_dim]
                    outputs, _ = tf.nn.dynamic_rnn(
                        cell,
                        inputs,
                        initial_state=
                        initial_state,  # [k;bs/nbs, ph, dec_rnn_dim]
                        time_major=False,
                        dtype=tf.float32,
                        scope="rnn")
                    with tf.variable_scope(
                            "rnn"):  # required to match PREDICT mode below
                        log_pis, mus, log_sigmas, corrs = project_to_GMM_params(
                            outputs, GMM_c, pred_dim,
                            self.hps.dec_GMM_proj_MLP_dims)

                tf.summary.histogram("GMM_log_pis", log_pis)
                tf.summary.histogram("GMM_log_sigmas", log_sigmas)
                tf.summary.histogram("GMM_corrs", corrs)

            elif mode == tf.estimator.ModeKeys.PREDICT:
                input_ = tf.concat(
                    [zx, tf.tile(TD["joint_present"], [k, 1])],
                    axis=1)  # [k;bs, N*K + 2*enc_rnn_dim + pred_dim+state_dim]
                state = initial_state
                with tf.variable_scope("rnn") as rnnscope:
                    log_pis, mus, log_sigmas, corrs, y = [], [], [], [], []
                    for j in range(ph):
                        if j > 0:
                            rnnscope.reuse_variables()
                        output, state = cell(input_, state)
                        log_pi_t, mu_t, log_sigma_t, corr_t = project_to_GMM_params(
                            output, GMM_c, pred_dim,
                            self.hps.dec_GMM_proj_MLP_dims)
                        y_t = GMM2D(log_pi_t, mu_t, log_sigma_t, corr_t,
                                    self.hps.log_sigma_min, self.hps.
                                    log_sigma_max).sample()  # [k;bs, pred_dim]
                        log_pis.append(log_pi_t)
                        mus.append(mu_t)
                        log_sigmas.append(log_sigma_t)
                        corrs.append(corr_t)
                        y.append(y_t)
                        car_inputs = tf.concat(
                            [tf.tile(TD["car1_future"][:, j, :], [k, 1]), y_t],
                            axis=1)  # [k;bs, state_dim + pred_dim]
                        input_ = tf.concat(
                            [zx, car_inputs], axis=1
                        )  # [k;bs, N*K + 2*enc_rnn_dim + state_dim + pred_dim]
                    log_pis = tf.stack(log_pis, axis=1)  # [k;bs, ph, GMM_c]
                    mus = tf.stack(mus, axis=1)  # [k;bs, ph, GMM_c*pred_dim]
                    log_sigmas = tf.stack(log_sigmas,
                                          axis=1)  # [k;bs, ph, GMM_c*pred_dim]
                    corrs = tf.stack(corrs, axis=1)  # [k;bs, ph, GMM_c]
                    car2_sampled_future = tf.reshape(
                        tf.stack(y, axis=1),
                        [k, -1, ph, pred_dim])  # [k, bs, ph, pred_dim]

            y_dist = GMM2D(
                tf.reshape(log_pis, [k, -1, ph, GMM_c]),
                tf.reshape(mus, [k, -1, ph, GMM_c * pred_dim]),
                tf.reshape(log_sigmas, [k, -1, ph, GMM_c * pred_dim]),
                tf.reshape(corrs, [k, -1, ph, GMM_c]), self.hps.log_sigma_min,
                self.hps.log_sigma_max)

            if mode == tf.estimator.ModeKeys.PREDICT:
                return y_dist, car2_sampled_future
            else:
                return y_dist