Exemple #1
0
 def _compareGradient(self, shape, axis, exclusive, reverse):
     x = np.arange(1, 9).reshape(shape).astype(np.float64)
     with self.test_session():
         t = tf.convert_to_tensor(x)
         result = tf.cumprod(t, axis, exclusive, reverse)
         jacob_t, jacob_n = tf.test.compute_gradient(t, shape, result, shape, x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
Exemple #2
0
def to_simplex(x):
  """Transform real vector of length `(K-1)` to a simplex of dimension `K`
  using a backward stick breaking construction.

  Args:
    x: tf.Tensor.
      A 1-D or 2-D tensor.

  Returns:
    tf.Tensor.
    A tensor of same shape as input but with last dimension of
    size `K`.

  Raises:
    InvalidArgumentError.
    If the input has Inf or NaN values.

  #### Notes

  x as a 3-D or higher tensor is not guaranteed to be supported.
  """
  x = tf.cast(x, dtype=tf.float32)
  dependencies = [tf.verify_tensor_all_finite(x, msg='')]
  x = control_flow_ops.with_dependencies(dependencies, x)

  if isinstance(x, (tf.Tensor, tf.Variable)):
    shape = x.get_shape().as_list()
  else:
    shape = x.shape

  if len(shape) == 1:
    K_minus_one = shape[0]
    eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32))
    z = tf.sigmoid(eq + x)
    pil = tf.concat([z, tf.constant([1.0])], 0)
    piu = tf.concat([tf.constant([1.0]), 1.0 - z], 0)
    S = tf.cumprod(piu)
    return S * pil
  else:
    n_rows = shape[0]
    K_minus_one = shape[1]
    eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32))
    z = tf.sigmoid(eq + x)
    pil = tf.concat([z, tf.ones([n_rows, 1])], 1)
    piu = tf.concat([tf.ones([n_rows, 1]), 1.0 - z], 1)
    S = tf.cumprod(piu, axis=1)
    return S * pil
Exemple #3
0
def create_tf_graph_for_simulate_paths():
    S = tf.placeholder(tf.float32)
    K = tf.placeholder(tf.float32)
    dt = tf.placeholder(tf.float32)
    T = tf.placeholder(tf.float32)
    sigma = tf.placeholder(tf.float32)
    r = tf.placeholder(tf.float32)
    dw = tf.placeholder(tf.float32)
    S_T = S * tf.cumprod(tf.exp((r-sigma**2/2)*dt+sigma*tf.sqrt(dt)*dw), axis=1)
    return (S, K, dt, T, sigma, r, dw, S_T)
  def _compare(self, x, axis, reverse, use_gpu=False):
    np_out = x
    if reverse:
      np_out = numpy_reverse(np_out, axis)
    np_out = np.cumprod(np_out, axis=axis)
    if reverse:
      np_out = numpy_reverse(np_out, axis)

    with self.test_session(use_gpu=use_gpu):
      tf_out = tf.cumprod(x, axis, reverse).eval()

    self.assertAllClose(np_out, tf_out)
def at(ut, N):
    """
    returns the allocation weighting given the updated usage vector
    """
    sorted_ut, free_list = tf.nn.top_k(-1 * ut, N)
    sorted_ut *= -1  # brings the usages to the original positive values

    # the exclusive argument makes the first element in the cumulative
    # product a 1 instead of the first element in the given tensor
    sorted_ut_cumprod = tf.cumprod(sorted_ut, exclusive=True)
    out_of_location_at = (1 - sorted_ut) * sorted_ut_cumprod

    empty_at_container = tf.TensorArray(tf.float32, N)
    full_at_container = empty_at_container.scatter(free_list, out_of_location_at)

    return full_at_container.pack()
Exemple #6
0
 def testInvalidAxis(self):
   x = np.arange(0, 10).reshape([2, 5]).astype(np.float32)
   input_tensor = tf.convert_to_tensor(x)
   with self.test_session(use_gpu=True):
     with self.assertRaisesWithPredicateMatch(
         tf.errors.InvalidArgumentError,
         lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
       tf.cumprod(input_tensor, -3).eval()
     with self.assertRaisesWithPredicateMatch(
         tf.errors.InvalidArgumentError,
         lambda e: "Expected scan axis in the range [-2, 2)" in str(e)):
       tf.cumprod(input_tensor, 2).eval()
     with self.assertRaisesWithPredicateMatch(
         tf.errors.InvalidArgumentError,
         lambda e: "axis must be a scalar" in str(e)):
       tf.cumprod(input_tensor, [0]).eval()
Exemple #7
0
  def _compare(self, x, axis, exclusive, reverse):
    np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
    with self.test_session(use_gpu=True):
      tf_out = tf.cumprod(x, axis, exclusive, reverse).eval()

    self.assertAllClose(np_out, tf_out)
Exemple #8
0
def discounted_reduce_sum(X, discount, axis=-1):
    if discount != 1.0:
        disc = tf.cumprod(discount*tf.ones_like(X), axis=axis)
    else:
        disc = 1.0
    return tf.reduce_sum(X*disc, axis=axis)
Exemple #9
0
def SB_Conv2d(inp,
              ksize,
              S=128,
              padding='SAME',
              strides=[1, 1, 1, 1],
              bias=True,
              train=True,
              reuse=False,
              sbp=False,
              temp_bern=0.5,
              temp_cat=0.5,
              activation='lwta',
              name='conv'):
    """
    Convolutional layer for the SB-LWTA model, incorporating local competition.
    
    Parameters:
        inp: 4d tensor 
            The input to the current layer.
        ksize: 5d tensor
            The size of the kernels. The last 2 dimensions denote the blocks and units therein.
        padding: str 
            The padding for the conv operation. Default: SAME. (see tf conv documentation).
        strides: 4d tensor 
            The strides for the conv operation. Default: [1,1,1,1] (see tf conv).
        bias: boolean
            Flag denoting the use of bias.
        train: boolean
            Flag to alternate between train or not branches.
        reuse: boolean
            Flag to reuse or not the variables of the layer.
        sbp: boolean
            Flag to enable or disable the stick breaking process
        temp_bern: float
            The temperature for the bernoulli relaxation
        temp_cat: float
            The temperature for the categorical relaxation
        activation: String
            Select the activation function for the current layer.
        name: str
            The name of the current layer.
            
    Returns:
        out: 4d tensor
            The output of the layer after the masked convolution operation, the addition of bias (if bias==True)
            and the LWTA activation.
        mW: 2d tensor
            The mean of the weights. Used to load values when calling the compression script
        masked_mw: 4d tensor
            The mean of the weights of the convolutional kernel masked with a sample from the IBP (if active).
            Used for calculating the compression ability of the implementation.
        masked_sw: 4d tensor
            The variance of the weights of the convolutional kernel masked with a sample from the IBP (if active).
            Used for calculating the compression ability of the implementation.
        activations: 2d tensor
            The activations for the current batch. Used for plotting the probability of activations.
    
    """

    K = ksize[-2]
    U = ksize[-1]
    tau = 1e-2

    name = name + '_' + activation
    with tf.variable_scope(name, reuse=reuse):

        # variables for the weights
        mW = tf.get_variable(
            'mW', [ksize[0], ksize[1], ksize[2], K * U],
            initializer=tf.contrib.layers.xavier_initializer(),
            dtype=tf.float32)

        sW = tf.get_variable('sW', [ksize[0], ksize[1], ksize[2], K * U],
                             initializer=tf.constant_initializer(-5.),
                             constraint=lambda x: tf.clip_by_value(x, -7., x),
                             dtype=tf.float32)
        sW = tf.nn.softplus(sW)

        # variables and construction for the stick breaking process
        if sbp:

            # posterior concentrations for the Kumaraswamy distribution
            conc1 = variable_on_cpu(
                'sb_t_u_1', [K],
                initializer=tf.constant_initializer(3.),
                constraint=lambda x: tf.clip_by_value(x, -6., x),
                dtype=tf.float32)

            conc0 = variable_on_cpu(
                'sb_t_u_2', [K],
                initializer=tf.constant_initializer(1.),
                constraint=lambda x: tf.clip_by_value(x, -6., x),
                dtype=tf.float32)
            conc1 = tf.nn.softplus(conc1)
            conc0 = tf.nn.softplus(conc0)

            # stick breaking construction
            q_u = kumaraswamy_sample(
                conc1, conc0, sample_shape=[inp.get_shape()[1].value, K])
            pi = tf.cumprod(q_u)

            # posterior bernooulli (relaxed) probabilities
            t_pi = tf.get_variable('sb_t_pi', [K], \
                                  initializer =  tf.initializers.random_uniform(-5., 1.),
                                  constraint = lambda x: tf.clip_by_value(x, -7., 600.),\
                                  dtype = tf.float32)
            t_pi = tf.nn.sigmoid(t_pi)

        biases = 0.
        if bias:
            biases = variable_on_cpu('bias', [K * U],
                                     tf.constant_initializer(0.0))

        z = 1.
        # train branch
        if train:

            # reparametrizable normal sample
            eps = tf.stop_gradient(tf.random_normal(mW.get_shape()))
            W = mW + eps * sW

            re = tf.ones_like(W)

            # stick breaking kl and operations
            if sbp:

                z_sample = bin_concrete_sample(t_pi, temp_bern)
                z = tf.tile(z_sample, [U])
                W *= z

                kl_sticks = tf.reduce_sum(
                    kumaraswamy_kl(tf.ones_like(conc1), tf.ones_like(conc0),
                                   conc1, conc0, q_u))
                kl_z = tf.reduce_sum(
                    bin_concrete_kl(pi, t_pi, temp_bern, z_sample))

                tf.add_to_collection('kl_loss', kl_sticks)
                tf.add_to_collection('kl_loss', kl_z)

                tf.summary.scalar('kl_sticks', kl_sticks)
                tf.summary.scalar('kl_z', kl_z)

                # if probability of activation is smaller than tau, it's inactive
                tf.summary.scalar(
                    'sparsity',
                    tf.reduce_sum(
                        tf.cast(tf.greater(t_pi /
                                           (1. + t_pi), tau), tf.float32)) * U)

            # add the kl terms to the collection
            kl_weights = tf.reduce_sum(normal_kl(tf.zeros_like(mW), tf.ones_like(sW), \
                                                 mW, sW, W))
            tf.add_to_collection('losses', kl_weights)
            tf.summary.scalar('kl_weights', kl_weights)

            # convolution operation
            lam = tf.nn.conv2d(inp, W, strides=strides,
                               padding=padding) + biases

            # choose activation based on input
            if activation == 'lwta':

                assert U > 1, 'The number of competing units should be larger than 1'

                # reshape weight to calculate probabilities
                lam_re = tf.reshape(
                    lam, [-1, lam.get_shape()[1],
                          lam.get_shape()[2], K, U])

                prbs = tf.nn.softmax(lam_re) + 1e-5
                prbs /= tf.reduce_sum(prbs, -1, keepdims=True)

                # draw relaxed sample and apply activation
                xi = concrete_sample(prbs, temp_cat)
                out = lam_re * xi
                out = tf.reshape(out, tf.shape(lam))

                # add the relative kl terms
                kl_xi = tf.reduce_mean(
                    tf.reduce_sum(
                        concrete_kl(tf.ones_like(lam_re) / U, prbs, xi), [1]))

                tf.add_to_collection('kl_loss', kl_xi)
                tf.summary.scalar('kl_xi', kl_xi)

            elif activation == 'relu':
                # apply relu
                out = tf.nn.relu(lam)

            elif activation == 'maxout':
                #apply maxout activation
                lam_re = tf.reshape(
                    lam, [-1, lam.get_shape()[1],
                          lam.get_shape()[2], K, U])
                out = tf.reduce_max(lam_re, -1, keepdims=False)

            else:
                print('Activation:', activation, 'not implemented.')

        # test branch, same with train but replace samples with means
        else:
            re = tf.ones_like(mW)
            z = 1.

            # if sbp is active calculate mask and draw samples
            if sbp:
                mask = tf.cast(tf.greater(t_pi, tau), tf.float32)
                z = Bernoulli(probs=mask * t_pi,
                              name="q_z_test",
                              dtype=tf.float32).sample()
                z = tf.tile(z, [U])
                re = tf.tile(mask * t_pi, [U])

            # convolution operation
            lam = tf.nn.conv2d(inp, re * mW, strides=strides,
                               padding=padding) + biases

            if activation == 'lwta':
                # calculate probabilities of activation
                lam_re = tf.reshape(
                    lam, [-1, lam.get_shape()[1],
                          lam.get_shape()[2], K, U])
                prbs = tf.nn.softmax(lam_re) + 1e-5
                prbs /= tf.reduce_sum(prbs, -1, keepdims=True)

                # draw sample for activated units
                out = lam_re * concrete_sample(prbs, 0.01)
                out = tf.reshape(out, tf.shape(lam))

            elif activation == 'relu':
                # apply relu
                out = tf.nn.relu(lam)

            elif activation == 'maxout':
                # apply maxout operation
                lam_re = tf.reshape(
                    lam, [-1, lam.get_shape()[1],
                          lam.get_shape()[2], K, U])
                out = tf.reduce_max(lam_re, -1)

            else:
                print('Activation:', activation, ' not implemented.')

    return out, mW, z * mW, z * sW**2, z
Exemple #10
0
def ngrams(strings, ngram_range):
    """Create a tensor of n-grams.

  Given a vector of strings, return a sparse matrix containing the ngrams from
  each string.  Each row in the output sparse tensor contains the set of
  ngrams from the corresponding element in the input tensor.

  The output ngrams including all whitespace and punctuation from the original
  strings.

  Example:

  strings = ['ab: c', 'wxy.']
  ngrams_range = (1,3)

  output is a sparse tensor with

  indices = [[0, 0], [0, 1], ..., [0, 11], [1, 0], [1, 1], ..., [1, 8]]
  values = ['a', 'ab', 'ab:', 'b', 'b:', 'b: ', ':', ': ', ': c', ' ', ' c',
            'c', 'w', 'wx', 'wxy', 'x', 'xy', 'xy.', 'y', 'y.', '.']
  dense_shape = [2, 12]

  Args:
    strings: A tensor of strings with size [batch_size,].
    ngram_range: A pair with the range (inclusive) of ngram sizes to return.

  Returns:
    A SparseTensor containing all ngrams from each element of the input.

  Raises:
    ValueError: if ngram_range[0] < 1 or ngram_range[1] < ngram_range[0]
  """
    # This function is implemented as follows.  First we split the input.  If the
    # input is ['abcd', 'q', 'xyz'] then the split opreation returns a
    # SparseTensor with
    #
    # indices=[[0, 0], [0, 1], [0, 2], [0, 3], [1, 0], [2, 0], [2, 1], [2, 2]]
    # values=['a', 'b', 'c', 'd', 'q', 'x', 'y', 'z']
    # dense_shape=[3, 4]
    #
    # We then create shifts of the values and first column of indices, buffering
    # to avoid overruning the end of the array, so the shifted values (if we are
    # creating ngrams up to size 3) are
    #
    # shifted_batch_indices[0]=[0, 0, 0, 0, 1, 2, 2, 2]
    # shifted_chars[0]=['a', 'b', 'c', 'd', 'q', 'x', 'y', 'z']
    #
    # shifted_batch_indices[1]=[0, 0, 0, 1, 2, 2, 2, -1]
    # shifted_chars[1]=['b', 'c', 'd', 'q', 'x', 'y', 'z', '']
    #
    # shifted_batch_indices[2]=[0, 0, 1, 2, 2, 2, -1, -1]
    # shifted_chars[2]=['c', 'd', 'q', 'x', 'y', 'z', '', '']
    #
    # These shifted ngrams are used to create the ngrams as follows.  We use
    # tf.string_join to join shifted_chars[:k] to create k-grams.  The batch that
    # the first of these belonged to is given by shifted_batch_indices[0].
    # However some of these will cross the boundaries between 'batches' and so
    # we we create a boolean mask which is True when shifted_indices[:k] are all
    # equal.
    #
    # This results in tensors of ngrams, their batch indices and a boolean mask,
    # which we then use to construct the output SparseTensor.
    chars = tf.string_split(strings, delimiter='')

    if ngram_range[0] < 1 or ngram_range[1] < ngram_range[0]:
        raise ValueError('Invalid ngram_range: %r' % (ngram_range, ))

    def _sliding_windows(values, num_shifts, fill_value):
        buffered_values = tf.concat(
            [values, tf.fill([num_shifts - 1], fill_value)], 0)
        return [
            tf.slice(buffered_values, [i], tf.shape(values))
            for i in range(num_shifts)
        ]

    shifted_batch_indices = _sliding_windows(chars.indices[:, 0],
                                             ngram_range[1] + 1,
                                             tf.constant(-1, dtype=tf.int64))
    shifted_chars = _sliding_windows(chars.values, ngram_range[1] + 1, '')

    # Construct a tensor of the form
    # [['a', 'ab, 'abc'], ['b', 'bcd', cde'], ...]
    def _string_join(tensors):
        if tensors:
            return tf.string_join(tensors)
        else:
            return

    ngrams_array = [
        _string_join(shifted_chars[:k])
        for k in range(ngram_range[0], ngram_range[1] + 1)
    ]
    ngrams_tensor = tf.stack(ngrams_array, 1)

    # Construct a boolean mask for whether each ngram in ngram_tensor is valid,
    # in that each character cam from the same batch.
    valid_ngram = tf.equal(
        tf.cumprod(tf.to_int32(
            tf.equal(tf.stack(shifted_batch_indices, 1),
                     tf.expand_dims(shifted_batch_indices[0], 1))),
                   axis=1), 1)
    valid_ngram = valid_ngram[:, (ngram_range[0] - 1):ngram_range[1]]

    # Construct a tensor with the batch that each ngram in ngram_tensor belongs
    # to.
    batch_indices = tf.tile(tf.expand_dims(chars.indices[:, 0], 1),
                            [1, ngram_range[1] + 1 - ngram_range[0]])

    # Apply the boolean mask and construct a SparseTensor with the given indices
    # and values, where another index is added to give the position within a
    # batch.
    batch_indices = tf.boolean_mask(batch_indices, valid_ngram)
    ngrams_tensor = tf.boolean_mask(ngrams_tensor, valid_ngram)
    instance_indices = segment_indices(batch_indices)
    return tf.SparseTensor(
        tf.stack([batch_indices, instance_indices], 1), ngrams_tensor,
        tf.stack([
            tf.size(strings, out_type=tf.int64),
            tf.reduce_max(instance_indices) + 1
        ], 0))
Exemple #11
0
    def _compare(self, x, axis, exclusive, reverse, use_gpu=False):
        np_out = handle_options(np.cumprod, x, axis, exclusive, reverse)
        with self.test_session(use_gpu=use_gpu):
            tf_out = tf.cumprod(x, axis, exclusive, reverse).eval()

        self.assertAllClose(np_out, tf_out)
Exemple #12
0
    def _build(self, batch_size, horizon, behavioral, per_decision, normalize=False, truncate_at=np.infty):
        if [batch_size, horizon, behavioral, per_decision, normalize,
            truncate_at]!=self._setting:

            #checkpoint = time.time()
            self._setting = [batch_size, horizon, behavioral, per_decision,
                             normalize, truncate_at]

            self.mask = tf.placeholder(name="mask", dtype=tf.float32, shape=[batch_size*horizon, 1])
            rews_by_episode = tf.split(self.rew, batch_size)
            rews_by_episode = tf.stack(rews_by_episode)
            disc = self.gamma + 0*rews_by_episode
            disc = tf.cumprod(disc, axis=1, exclusive=True)
            disc_rews = rews_by_episode * disc
            rets = tf.reduce_sum(disc_rews, axis=1)

            if behavioral is None:
                #On policy
                avg_J, var_J = tf.nn.moments(tf.reduce_sum(disc_rews, axis=1), axes=[0])
                grad_avg_J = tf.constant(0)
                grad_var_J = tf.constant(0)
                avg_iw = tf.constant(1)
                var_iw = tf.constant(0)
                max_iw = tf.constant(1)
                ess = batch_size
            else:
                #Off policy -> importance weighting :(
                log_ratios = self.logprobs - behavioral.pd.logp(self.ac_in)
                log_ratios = tf.expand_dims(log_ratios, axis=1)
                log_ratios = tf.multiply(log_ratios, self.mask)
                log_ratios_by_episode = tf.split(log_ratios, batch_size)
                log_ratios_by_episode = tf.stack(log_ratios_by_episode)
                if per_decision:
                    #Per-decision
                    iw = tf.exp(tf.cumsum(log_ratios_by_episode, axis=1))
                    if not normalize:
                        #Per-decision, unnormalized (possibly truncated)
                        iw = tf.clip_by_value(iw, 0, truncate_at)
                        weighted_rets = tf.reduce_sum(tf.multiply(disc_rews,iw), axis=1)
                        avg_J, var_J = tf.nn.moments(weighted_rets, axes=[0])
                    else:
                        #Per-decision, self-normalized
                        iw = batch_size*iw/tf.reduce_sum(iw, axis=0)
                        avg_J_t = tf.reduce_mean(disc_rews* iw,
                                                axis=0)
                        avg_J = tf.reduce_sum(avg_J_t)
                        var_J = 1./batch_size * tf.reduce_sum(disc**2 * tf.reduce_mean(iw**2 *
                                                               (rews_by_episode -
                                                                avg_J_t)**2,
                                                               axis=0)) #Da controllare
                        weighted_rets = tf.reduce_sum(tf.multiply(disc_rews,iw), axis=1)
                    eff_iw = weighted_rets/rets
                    avg_iw, var_iw = tf.nn.moments(eff_iw, axes=[0])
                    max_iw = tf.reduce_max(eff_iw)
                else:
                    #Per-trajectory
                    iw = tf.exp(tf.reduce_sum(log_ratios_by_episode, axis=1))
                    if not normalize:
                        #Per trajectory, unnormalized (possibly truncated)
                        iw = tf.clip_by_value(iw, 0, truncate_at)
                        weighted_rets = tf.multiply(rets, iw)
                        avg_J, var_J = tf.nn.moments(weighted_rets, axes=[0])
                        avg_iw, var_iw = tf.nn.moments(iw, axes=[0])
                        ess = tf.round(tf.reduce_sum(iw)**2 / tf.reduce_sum(iw**2))
                    else:
                        #Per-trajectory, self-normalized
                        iw = batch_size*iw/tf.reduce_sum(iw, axis=0)
                        avg_J = tf.reduce_mean(rets*iw, axis=0)
                        var_J = 1./batch_size * tf.reduce_mean(iw**2 *
                                                    (rets - avg_J)**2)
                        avg_iw = tf.reduce_mean(iw, axis=0)
                        var_iw = 1./batch_size * tf.reduce_mean((iw - 1)**2)

                    ess = tf.round(tf.reduce_sum(iw)**2 / tf.reduce_sum(iw**2))
                    max_iw = tf.reduce_max(iw)


                grad_avg_J = U.flatgrad(avg_J, self.get_param())
                grad_var_J = U.flatgrad(var_J, self.get_param())

                avg_ret, var_ret = tf.nn.moments(tf.reduce_sum(disc_rews, axis=1), axes=[0])
                max_ret = tf.reduce_max(tf.reduce_sum(disc_rews, axis=1))

            self._avg_J = avg_J
            self._var_J = var_J
            self._grad_avg_J = grad_avg_J
            self._grad_var_J = grad_var_J
            self._get_avg_J = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [avg_J])
            self._get_var_J = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [var_J])
            self._get_grad_J = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [grad_avg_J])
            self._get_grad_var_J = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [grad_var_J])
            self._get_all = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [avg_J, var_J, grad_avg_J, grad_var_J])
            self._get_ess = U.function([self.ob, self.ac_in, self.rew,
                                        self.gamma, self.mask], [ess])
            self._get_iw_stats = U.function([self.ob, self.ac_in, self.rew,
                                             self.gamma, self.mask], [avg_iw,
                                                                      var_iw,
                                                                      max_iw,
                                                                      ess])
            self._get_ret_stats = U.function([self.ob, self.ac_in, self.rew, self.gamma, self.mask], [avg_ret, var_ret, max_ret])
    def __init__(self, sess, rnn_size, layer_size, decoder_vocab_size, embedding_dim, k, lr):

        self.sess = sess
        self._k = k
        self.lr = lr
        self.postive_imediate_reward = 1.0

        self.negative_imediate_reward = 0.2

        self.account_ratio = 0.9

        self.rnn_size = rnn_size
        
        
        self.interesting = tf.placeholder(tf.float32, shape=[None, decoder_vocab_size], name='interest')
        self.history_masking = tf.placeholder(tf.float32, shape=[None, decoder_vocab_size], name='history')

        decoder_cell = self._get_simple_lstm(rnn_size, layer_size)

        self.rnn_init_state = tf.placeholder(tf.float32, [1, rnn_size], name='rnn_state')


        decoder_embedding = tf.Variable(tf.truncated_normal(shape=[decoder_vocab_size, embedding_dim], stddev=0.1),
                                            name='decoder_embedding')


        self.start_tokens = tf.placeholder(tf.int32, shape=[None], name='start_tokens')

        self.start_hit = tf.placeholder(tf.float32, shape=[None], name='start_hit')

        self.mem = tf.placeholder(tf.float32, shape=[None, decoder_vocab_size], name='mem')

        self.sequence_length = tf.placeholder(tf.int32, shape=[None], name='seq_length')

        helper = InteractiveGreedyEmbeddingHelper(decoder_embedding, self._k, self.start_tokens, self.start_hit,
                                                  decoder_vocab_size, self.sequence_length)

        with tf.variable_scope('decoder'):
            fc_layer = Dense(decoder_vocab_size, activation=tf.nn.softmax)

            decoder = ExternalMemInteractiveDecoder(decoder_cell, helper, self.rnn_init_state,
                                                    self.history_masking, self.interesting, self.mem, self.rnn_size,
                                                    fc_layer)

        self.logits, self.final_state, self.final_history_masking, self.hit, self.final_sequence_lengths = \
            dynamic_interactive_decode(decoder)

        self.hit = self.hit.hit

        reverse_hit = tf.reverse_sequence(self.hit, self.sequence_length, seq_dim=1)

        self.reverse_imediate_reward = tf.where(reverse_hit > 0,
                                                reverse_hit * self.postive_imediate_reward,
                                                (reverse_hit - 1) * self.negative_imediate_reward)

        self.imediate_reward = tf.reverse_sequence(self.reverse_imediate_reward, self.sequence_length, seq_dim=1)

        initial_time = tf.constant(0, dtype=tf.int32)

        initial_pre_reward = self.reverse_imediate_reward[0, 0] * 0.0

        output_ta = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)

        def cond(time, pre_reward, output_ta_l):
            return tf.reduce_all(time < self.sequence_length)

        def body(time, pre_reward, output_ta_l):
            pre_reward = self.reverse_imediate_reward[0, time] + self.account_ratio * pre_reward

            output_ta_l = output_ta_l.write(time, pre_reward)
            return time + 1, pre_reward, output_ta_l

        res = tf.while_loop(cond, body, loop_vars=[initial_time, initial_pre_reward, output_ta])

        self.cumsum_reward = tf.reverse_sequence([res[-1].stack()], self.sequence_length, seq_dim=1)
        self.cumsum_reward = tf.stop_gradient(self.cumsum_reward)

        self.rnn_output = self.logits.rnn_output

        self.sample_ids = self.logits.sample_id

        self.onehot_sample = tf.one_hot(self.sample_ids, depth=decoder_vocab_size, axis=-1)

        self.target = tf.placeholder(tf.int32, shape=[None, None], name='target')

        self.onehot_target = tf.one_hot(self.target, depth=decoder_vocab_size, axis=-1)

        self.gt_ratio = tf.cumprod((self.cumsum_reward * 0 + 1) * self.account_ratio, axis=1)
        self.gt_ratio = tf.stop_gradient(self.gt_ratio)
        self.is_reinforce = tf.placeholder(tf.int32, shape=[], name='isReinfoce')

        self.reinforce_cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf.log(1e-8 +  tf.reshape(self.rnn_output,
                                                                                       [-1, decoder_vocab_size])) *
                                                                     tf.reshape(self.onehot_sample, [-1, decoder_vocab_size]) *
                                                                     self.cumsum_reward * self.gt_ratio,
                                                                     axis=-1), name='reinfolearn')

        self.supervised_cross_entropy = tf.reduce_mean(-tf.reduce_sum(
            tf.log(1e-8 + tf.reshape(self.rnn_output, [-1, decoder_vocab_size])) *
            tf.reshape(self.onehot_target, [-1, decoder_vocab_size]), name='mem_suplearn'))

        self.cost = tf.cond(self.is_reinforce > 0, lambda: self.reinforce_cross_entropy,
                            lambda: self.supervised_cross_entropy)

        self.train_opt = tf.train.AdamOptimizer(self.lr, epsilon=1e-4)

        gradients = self.train_opt.compute_gradients(self.cost)
        capped_gradients = [(tf.clip_by_value(grad, -10., 10.), var) for grad, var in gradients if grad is not None]
        self.train_op = self.train_opt.apply_gradients(capped_gradients)
Exemple #14
0
def ngrams(tokens, ngram_range, separator, name=None):
  """Create a `SparseTensor` of n-grams.

  Given a `SparseTensor` of tokens, returns a `SparseTensor` containing the
  ngrams that can be constructed from each row.

  `separator` is inserted between each pair of tokens, so " " would be an
  appropriate choice if the tokens are words, while "" would be an appropriate
  choice if they are characters.

  Example:

  `tokens` is a `SparseTensor` with

  indices = [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2], [1, 3]]
  values = ['One', 'was', 'Johnny', 'Two', 'was', 'a', 'rat']
  dense_shape = [2, 4]

  If we set
  ngrams_range = (1,3)
  separator = ' '

  output is a `SparseTensor` with

  indices = [[0, 0], [0, 1], [0, 2], ..., [1, 6], [1, 7], [1, 8]]
  values = ['One', 'One was', 'One was Johnny', 'was', 'was Johnny', 'Johnny',
            'Two', 'Two was', 'Two was a', 'was', 'was a', 'was a rat', 'a',
            'a rat', 'rat']
  dense_shape = [2, 9]

  Args:
    tokens: a two-dimensional`SparseTensor` of dtype `tf.string` containing
      tokens that will be used to construct ngrams.
    ngram_range: A pair with the range (inclusive) of ngram sizes to return.
    separator: a string that will be inserted between tokens when ngrams are
      constructed.
    name: (Optional) A name for this operation.

  Returns:
    A `SparseTensor` containing all ngrams from each row of the input.

  Raises:
    ValueError: if ngram_range[0] < 1 or ngram_range[1] < ngram_range[0]
  """
  # This function is implemented as follows.  Assume we start with the following
  # `SparseTensor`:
  #
  # indices=[[0, 0], [0, 1], [0, 2], [0, 3], [1, 0], [2, 0], [2, 1], [2, 2]]
  # values=['a', 'b', 'c', 'd', 'q', 'x', 'y', 'z']
  # dense_shape=[3, 4]
  #
  # First we then create shifts of the values and first column of indices,
  # buffering to avoid overrunning the end of the array, so the shifted values
  # (if we are ngrams up to size 3) are
  #
  # shifted_batch_indices[0]=[0, 0, 0, 0, 1, 2, 2, 2]
  # shifted_tokens[0]=['a', 'b', 'c', 'd', 'q', 'x', 'y', 'z']
  #
  # shifted_batch_indices[1]=[0, 0, 0, 1, 2, 2, 2, -1]
  # shifted_tokens[1]=['b', 'c', 'd', 'q', 'x', 'y', 'z', '']
  #
  # shifted_batch_indices[2]=[0, 0, 1, 2, 2, 2, -1, -1]
  # shifted_tokens[2]=['c', 'd', 'q', 'x', 'y', 'z', '', '']
  #
  # These shifted ngrams are used to create the ngrams as follows.  We use
  # tf.string_join to join shifted_tokens[:k] to create k-grams. The `separator`
  # string is inserted between each pair of tokens in the k-gram.
  # The batch that the first of these belonged to is given by
  # shifted_batch_indices[0]. However some of these will cross the boundaries
  # between 'batches' and so we we create a boolean mask which is True when
  # shifted_indices[:k] are all equal.
  #
  # This results in tensors of ngrams, their batch indices and a boolean mask,
  # which we then use to construct the output SparseTensor.
  with tf.name_scope(name, 'ngrams'):
    if ngram_range[0] < 1 or ngram_range[1] < ngram_range[0]:
      raise ValueError('Invalid ngram_range: %r' % (ngram_range,))

    def _sliding_windows(values, num_shifts, fill_value):
      buffered_values = tf.concat(
          [values, tf.fill([num_shifts - 1], fill_value)], 0)
      return [tf.slice(buffered_values, [i], tf.shape(values))
              for i in range(num_shifts)]

    shifted_batch_indices = _sliding_windows(
        tokens.indices[:, 0], ngram_range[1] + 1,
        tf.constant(-1, dtype=tf.int64))
    shifted_tokens = _sliding_windows(tokens.values, ngram_range[1] + 1, '')

    # Construct a tensor of the form
    # [['a', 'ab, 'abc'], ['b', 'bcd', cde'], ...]
    def _string_join(tensors):
      if tensors:
        return tf.string_join(tensors, separator=separator)
      else:
        return

    ngrams_array = [_string_join(shifted_tokens[:k])
                    for k in range(ngram_range[0], ngram_range[1] + 1)]
    ngrams_tensor = tf.stack(ngrams_array, 1)

    # Construct a boolean mask for whether each ngram in ngram_tensor is valid,
    # in that each character cam from the same batch.
    valid_ngram = tf.equal(tf.cumprod(
        tf.to_int32(tf.equal(tf.stack(shifted_batch_indices, 1),
                             tf.expand_dims(shifted_batch_indices[0], 1))),
        axis=1), 1)
    valid_ngram = valid_ngram[:, (ngram_range[0] - 1):ngram_range[1]]

    # Construct a tensor with the batch that each ngram in ngram_tensor belongs
    # to.
    batch_indices = tf.tile(tf.expand_dims(tokens.indices[:, 0], 1),
                            [1, ngram_range[1] + 1 - ngram_range[0]])

    # Apply the boolean mask and construct a SparseTensor with the given indices
    # and values, where another index is added to give the position within a
    # batch.
    batch_indices = tf.boolean_mask(batch_indices, valid_ngram)
    ngrams_tensor = tf.boolean_mask(ngrams_tensor, valid_ngram)
    instance_indices = segment_indices(batch_indices)
    dense_shape_second_dim = tf.maximum(tf.reduce_max(instance_indices), -1) + 1
    return tf.SparseTensor(
        indices=tf.stack([batch_indices, instance_indices], 1),
        values=ngrams_tensor,
        dense_shape=tf.stack(
            [tokens.dense_shape[0], dense_shape_second_dim]))
    def build_Q_expansion_graph(self,
                                obs,
                                first_rewards,
                                first_done,
                                worldmodel,
                                rollout_len=1,
                                model_ensembling=False):
        ### this sets up the machinery for having multiple parallel rollouts, each of which has a single consistent transition
        ensemble_idxs, transition_sample_n, reward_sample_n = worldmodel.get_ensemble_idx_info(
        )
        q_sample_n = self.bayesian_config[
            "eval_sample_count"] if self.bayesian_config is not False else 1
        first_rewards = tf.tile(
            tf.expand_dims(tf.expand_dims(first_rewards, 1), 1),
            [1, transition_sample_n, reward_sample_n])
        first_rewards.set_shape([None, transition_sample_n, reward_sample_n])
        if model_ensembling:
            obs = tf.tile(tf.expand_dims(obs, 1), [1, transition_sample_n, 1])
            obs.set_shape([None, transition_sample_n, self.obs_dim])
            first_done = tf.tile(tf.expand_dims(first_done, 1),
                                 [1, transition_sample_n])
            first_done.set_shape([None, transition_sample_n])

        ### below, we use a while loop to actually do the iterative model rollout
        extra_info = worldmodel.init_extra_info(obs)

        action_ta = tf.TensorArray(size=rollout_len,
                                   dynamic_size=False,
                                   dtype=tf.float32)
        obs_ta = tf.TensorArray(size=rollout_len,
                                dynamic_size=False,
                                dtype=tf.float32)
        done_ta = tf.TensorArray(size=rollout_len,
                                 dynamic_size=False,
                                 dtype=tf.float32)
        extra_info_ta = tf.TensorArray(size=rollout_len,
                                       dynamic_size=False,
                                       dtype=tf.float32)

        def rollout_loop_body(r_i, xxx_todo_changeme):
            (obs, done, extra_info, action_ta, obs_ta, dones_ta,
             extra_info_ta) = xxx_todo_changeme
            action_pretanh, action = self.build_evalution_graph(
                tf.stop_gradient(obs), get_full_info=True)

            if model_ensembling:
                next_obs, next_dones, next_extra_info = worldmodel.transition(
                    obs, action, extra_info, ensemble_idxs=ensemble_idxs)
            else:
                next_obs, next_dones, next_extra_info = worldmodel.transition(
                    obs, action, extra_info)
                next_obs = tf.reduce_mean(next_obs, -2)
                next_dones = tf.reduce_mean(next_dones, -1)

            action_ta = action_ta.write(r_i, action)
            obs_ta = obs_ta.write(r_i, obs)
            dones_ta = dones_ta.write(r_i, done)
            extra_info_ta = extra_info_ta.write(r_i, extra_info)
            return r_i + 1, (next_obs, next_dones, next_extra_info, action_ta,
                             obs_ta, dones_ta, extra_info_ta)

        _, (final_obs, final_done, final_extra_info, action_ta, obs_ta,
            done_ta, extra_info_ta) = tf.while_loop(
                lambda r_i, _: r_i < rollout_len, rollout_loop_body, [
                    0,
                    (obs, first_done, extra_info, action_ta, obs_ta, done_ta,
                     extra_info_ta)
                ])

        final_action_pretanh, final_action = self.build_evalution_graph(
            tf.stop_gradient(final_obs), get_full_info=True)

        ### compile the TensorArrays into useful tensors
        obss = obs_ta.stack()
        obss = tf.reshape(
            obss,
            tf.stack([rollout_len, -1, transition_sample_n, self.obs_dim]))
        obss = tf.transpose(obss, [1, 0, 2, 3])
        final_obs = tf.reshape(
            final_obs, tf.stack([-1, 1, transition_sample_n, self.obs_dim]))
        all_obss = tf.concat([obss, final_obs], 1)
        next_obss = all_obss[:, 1:]

        dones = done_ta.stack()
        dones = tf.reshape(dones,
                           tf.stack([rollout_len, -1, transition_sample_n]))
        dones = tf.transpose(dones, [1, 0, 2])
        final_done = tf.reshape(final_done,
                                tf.stack([-1, 1, transition_sample_n]))
        all_dones = tf.concat([dones, final_done], 1)

        actions = action_ta.stack()
        actions = tf.reshape(
            actions,
            tf.stack([rollout_len, -1, transition_sample_n, self.action_dim]))
        actions = tf.transpose(actions, [1, 0, 2, 3])
        final_action = tf.reshape(
            final_action,
            tf.stack([-1, 1, transition_sample_n, self.action_dim]))
        all_actions = tf.concat([actions, final_action], 1)

        continue_probs = tf.cumprod(1. - all_dones, axis=1)
        rewards = worldmodel.get_rewards(obss, actions, next_obss)
        rawrew = rewards = tf.concat(
            [tf.expand_dims(first_rewards, 1), rewards], 1)

        ### TDK trick means we have to guess at every timestep
        if self.value_expansion["tdk_trick"]:
            guess_info = tf.concat([obss, actions], -1)
            Q_guesses = self.Q(guess_info, reduce_mode="random")
            Q_guesses = tf.reduce_mean(
                Q_guesses, -1
            )  # make it so there's only one guess per rollout length, which is the mean of the guesses under all the various model rollouts
            reached_this_point_to_guess_prob = tf.reduce_mean(
                continue_probs, -1)
        else:
            Q_guesses = None
            reached_this_point_to_guess_prob = None

        ### use the Q function at every timestep to get value estimates
        target_info = tf.concat([all_obss, all_actions], -1)
        Q_targets = self.old_Q(target_info, reduce_mode="none")

        rollout_frames = rollout_len + 1  # if we take N steps, we have N+1 frames

        ### create "decay-exponent matrix" of size [1,ROLLOUT_FRAMES,ROLLOUT_FRAMES,1]. the first ROLLOUT_FRAMES corresponds to the index of the source, the second to the target.
        ts_count_mat = (tf.cast(
            tf.reshape(tf.range(rollout_frames), [1, rollout_frames]) -
            tf.reshape(tf.range(rollout_frames), [rollout_frames, 1]),
            tf.float32))
        reward_coeff_matrix = tf.matrix_band_part(
            tf.ones([rollout_frames, rollout_frames]), 0,
            -1) * self.discount**ts_count_mat
        value_coeff_matrix = tf.matrix_band_part(
            tf.ones([rollout_frames, rollout_frames]), 0,
            -1) * self.discount**(1. + ts_count_mat)
        reward_coeff_matrix = tf.reshape(
            reward_coeff_matrix, [1, rollout_frames, rollout_frames, 1, 1])
        value_coeff_matrix = tf.reshape(
            value_coeff_matrix, [1, rollout_frames, rollout_frames, 1, 1])

        ### similarly, create a "done" matrix
        shifted_continue_probs = tf.concat([
            tf.expand_dims(tf.ones_like(continue_probs[:, 0]), 1),
            continue_probs[:, :-1]
        ], 1)
        reward_continue_matrix = tf.expand_dims(
            shifted_continue_probs, 1) / tf.expand_dims(
                shifted_continue_probs + 1e-8, 2)
        value_continue_matrix = tf.expand_dims(
            continue_probs, 1) / tf.expand_dims(shifted_continue_probs + 1e-8,
                                                2)
        reward_continue_matrix = tf.expand_dims(reward_continue_matrix, -1)
        value_continue_matrix = tf.expand_dims(value_continue_matrix, -1)

        ### apply the discounting factors to the rewards and values
        rewards = tf.expand_dims(
            rewards, 1) * reward_coeff_matrix * reward_continue_matrix
        rewards = tf.cumsum(rewards, axis=2)
        values = tf.expand_dims(Q_targets,
                                1) * value_coeff_matrix * value_continue_matrix

        ### compute the targets using the Bellman equation
        sampled_targets = tf.expand_dims(
            rewards, -2) * self.reward_scale + tf.expand_dims(values, -1)

        ### flatten out the various sources of variance (transition, reward, and Q-function ensembles) to get a set of estimates for each candidate target
        sampled_targets = tf.reshape(
            sampled_targets,
            tf.stack([
                -1, rollout_frames, rollout_frames,
                transition_sample_n * reward_sample_n * q_sample_n
            ]))

        ### compute the mean and variance for each candidate target
        target_means, target_variances = tf.nn.moments(sampled_targets, 3)

        ### compute the confidence, either using the full covariance matrix, or approximating all the estimators as independent
        if self.value_expansion["covariances"]:
            targetdiffs = sampled_targets - tf.expand_dims(target_means, 3)
            target_covariances = tf.einsum(
                "abij,abjk->abik", targetdiffs,
                tf.transpose(targetdiffs, [0, 1, 3, 2]))
            target_confidence = tf.squeeze(
                tf.matrix_solve(
                    target_covariances + tf.expand_dims(
                        tf.expand_dims(
                            tf.matrix_band_part(
                                tf.ones(tf.shape(target_covariances)[-2:]), 0,
                                0) * 1e-3, 0), 0),
                    tf.ones(
                        tf.concat([
                            tf.shape(target_covariances)[:-1],
                            tf.constant([1])
                        ], 0))), -1)
        else:
            target_confidence = 1. / (target_variances + 1e-8)

        ### normalize so weights sum to 1
        target_confidence *= tf.matrix_band_part(
            tf.ones([1, rollout_frames, rollout_frames]), 0, -1)
        target_confidence = target_confidence / tf.reduce_sum(
            target_confidence, axis=2, keepdims=True)

        ### below here is a bunch of debugging Print statements that I use as a sanity check:
        # target_confidence = tf.Print(target_confidence, [], message="raw rewards")
        # target_confidence = tf.Print(target_confidence, [rawrew[0,:,0,0]], summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [], message="\n", summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [(1. - all_dones)[0,:,0]], message="contin", summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [continue_probs[0,:,0]], message="cum_contin", summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [shifted_continue_probs[0,:,0]], message="shifted contin", summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [], message="reward_coeff")
        # for i in range(rollout_len+1): target_means = tf.Print(target_means, [reward_coeff_matrix[0,i,:,0,0]], summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [], message="reward_continue")
        # for i in range(rollout_len+1): target_means = tf.Print(target_means, [reward_continue_matrix[0,i,:,0,0]], summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [], message="value_coeff")
        # for i in range(rollout_len+1): target_means = tf.Print(target_means, [value_coeff_matrix[0,i,:,0,0]], summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [], message="value_continue")
        # for i in range(rollout_len+1): target_means = tf.Print(target_means, [value_continue_matrix[0,i,:,0,0]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="rewards")
        # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [rewards[0,i,:,0,0]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="target Qs")
        # target_confidence = tf.Print(target_confidence, [Q_targets[0,:,0,0]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="values")
        # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [values[0,i,:,0,0]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="target_means")
        # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_means[0,i,:]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="target_variance")
        # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_variances[0,i,:]], summarize=rollout_len+1)
        # target_confidence = tf.Print(target_confidence, [], message="target_confidence")
        # for i in range(rollout_len+1): target_confidence = tf.Print(target_confidence, [target_confidence[0,i,:]], summarize=rollout_len+1)
        # target_means = tf.Print(target_means, [target_confidence, action_lls, tf.shape(Q_targets)], message="\n\n", summarize=10)

        return target_means, target_confidence, Q_guesses, reached_this_point_to_guess_prob
Exemple #16
0
def discounted_reduce_sum(X, discount, axis=-1):
    if discount != 1.0:
        disc = tf.cumprod(discount * tf.ones_like(X), axis=axis)
    else:
        disc = 1.0
    return tf.reduce_sum(X * disc, axis=axis)