コード例 #1
0
 def sample(self, time, outputs, state):
     del state
     # Return -1s where we did not sample, and sample_ids elsewhere
     select_sample = bernoulli_sample(probs=self.sampling_probability,
                                      dtype=dtypes.bool,
                                      sample_shape=self.batch_size,
                                      seed=self.scheduling_seed)
     return array_ops.where(
         select_sample, categorical_sample(logits=outputs, seed=self.seed),
         gen_array_ops.fill([self.batch_size], -1))
コード例 #2
0
ファイル: array_ops.py プロジェクト: ChessWarrior/pred-rain
def alphas(shape, alpha_value, name=None):
    """Creates a tensor with all elements set to `alpha_value`.
    This operation returns a tensor of type `dtype` with shape `shape` and all
    elements set to alpha.

    Parameters
    ----------
    shape: A list of integers, a tuple of integers, or a 1-D `Tensor` of type `int32`.
        The shape of the desired tensor
    alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`
        The value used to fill the resulting `Tensor`.
    name: str
        A name for the operation (optional).

    Returns
    -------
    A `Tensor` with all elements set to alpha.

    Examples
    --------
    >>> tl.alphas([2, 3], tf.int32)  # [[alpha, alpha, alpha], [alpha, alpha, alpha]]
    """

    with ops.name_scope(name, "alphas", [shape]) as name:

        alpha_tensor = convert_to_tensor(alpha_value)
        alpha_dtype = dtypes.as_dtype(alpha_tensor.dtype).base_dtype

        if not isinstance(shape, ops.Tensor):
            try:
                shape = constant_op._tensor_shape_tensor_conversion_function(
                    tensor_shape.TensorShape(shape))
            except (TypeError, ValueError):
                shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)

        if not shape._shape_tuple():
            shape = reshape(shape, [-1])  # Ensure it's a vector

        try:
            output = constant(alpha_value,
                              shape=shape,
                              dtype=alpha_dtype,
                              name=name)

        except (TypeError, ValueError):
            output = fill(shape,
                          constant(alpha_value, dtype=alpha_dtype),
                          name=name)

        if output.dtype.base_dtype != alpha_dtype:
            raise AssertionError("Dtypes do not corresponds: %s and %s" %
                                 (output.dtype.base_dtype, alpha_dtype))

        return output
コード例 #3
0
ファイル: helper.py プロジェクト: codemogroup/Interview-Bot
 def sample(self, time, outputs, state, name=None):
     with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample",
                         [time, outputs, state]):
         # Return -1s where we did not sample, and sample_ids elsewhere
         select_sampler = bernoulli.Bernoulli(
             probs=self._sampling_probability, dtype=dtypes.bool)
         select_sample = select_sampler.sample(sample_shape=self.batch_size,
                                               seed=self._scheduling_seed)
         sample_id_sampler = categorical.Categorical(logits=outputs)
         return array_ops.where(select_sample,
                                sample_id_sampler.sample(seed=self._seed),
                                gen_array_ops.fill([self.batch_size], -1))
コード例 #4
0
ファイル: helper.py プロジェクト: AnddyWang/tensorflow
 def sample(self, time, outputs, state, name=None):
   with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample",
                       [time, outputs, state]):
     # Return -1s where we did not sample, and sample_ids elsewhere
     select_sampler = bernoulli.Bernoulli(
         probs=self._sampling_probability, dtype=dtypes.bool)
     select_sample = select_sampler.sample(
         sample_shape=self.batch_size, seed=self._scheduling_seed)
     sample_id_sampler = categorical.Categorical(logits=outputs)
     return array_ops.where(
         select_sample,
         sample_id_sampler.sample(seed=self._seed),
         gen_array_ops.fill([self.batch_size], -1))
コード例 #5
0
ファイル: adagrad.py プロジェクト: Huoxubeiyin/tensorflow
 def _create_slots(self, var_list):
   for v in var_list:
     with ops.colocate_with(v):
       dtype = v.dtype.base_dtype
       if v.get_shape().is_fully_defined():
         init = init_ops.constant_initializer(self._initial_accumulator_value,
                                              dtype=dtype)
       else:
         # Use a Tensor instead of initializer if variable does not have static
         # shape.
         init_constant = gen_array_ops.fill(array_ops.shape(v),
                                            self._initial_accumulator_value)
         init = math_ops.cast(init_constant, dtype)
     self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype,
                                             "accumulator", self._name)
コード例 #6
0
ファイル: QAdagrad.py プロジェクト: regginalee/TensorQuant
 def _create_slots(self, var_list):
   for v in var_list:
     with ops.colocate_with(v):
       dtype = v.dtype.base_dtype
       if v.get_shape().is_fully_defined():
         init = init_ops.constant_initializer(self._initial_accumulator_value,
                                              dtype=dtype)
       else:
         # Use a Tensor instead of initializer if variable does not have static
         # shape.
         init_constant = gen_array_ops.fill(array_ops.shape(v),
                                            self._initial_accumulator_value)
         init = math_ops.cast(init_constant, dtype)
     self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype,
                                             "accumulator", self._name)
コード例 #7
0
 def _create_vars(self, var_list, state):
   for v in var_list:
     # TODO(isaprykin): Delete colocate_with(v) from other optimizers and
     # confirm that colocation will happen anyway.
     dtype = v.dtype.base_dtype
     if v.get_shape().is_fully_defined():
       init = init_ops.constant_initializer(self._initial_accumulator_value,
                                            dtype=dtype)
     else:
       # Use a Tensor instead of initializer if variable does not have static
       # shape.
       init_constant = gen_array_ops.fill(
           array_ops.shape(v), self._initial_accumulator_value)
       init = math_ops.cast(init_constant, dtype)
     state.create_slot_with_initializer(v, init, v.get_shape(), dtype,
                                        "accumulator")
コード例 #8
0
    def get_next_input(inp, out):
      next_input = inp.read(time)
      if self._prenet is not None:
        next_input = self._prenet(next_input)
        out = self._prenet(out)
      if self._sampling_prob > 0.:
        next_input = tf.stop_gradient(next_input)
        out = tf.stop_gradient(out)
        select_sampler = bernoulli.Bernoulli(
            probs=self._sampling_prob, dtype=dtypes.bool
        )
        select_sample = select_sampler.sample(
            sample_shape=(self.batch_size, 1), seed=self._seed
        )
        select_sample = tf.tile(select_sample, [1, self._last_dim])
        sample_ids = array_ops.where(
            select_sample, out,
            gen_array_ops.fill(
                [self.batch_size, self._last_dim],
                tf.cast(-20., self._dtype)
            )
        )
        where_sampling = math_ops.cast(
            array_ops.where(sample_ids > -20), dtypes.int32
        )
        where_not_sampling = math_ops.cast(
            array_ops.where(sample_ids <= -20), dtypes.int32
        )
        sample_ids_sampling = array_ops.gather_nd(sample_ids, where_sampling)
        inputs_not_sampling = array_ops.gather_nd(
            next_input, where_not_sampling
        )
        sampled_next_inputs = sample_ids_sampling
        base_shape = array_ops.shape(next_input)

        next_input = (
            array_ops.scatter_nd(
                indices=where_sampling,
                updates=sampled_next_inputs,
                shape=base_shape
            ) + array_ops.scatter_nd(
                indices=where_not_sampling,
                updates=inputs_not_sampling,
                shape=base_shape
            )
        )
      return next_input
コード例 #9
0
ファイル: layers.py プロジェクト: HalimHa/Kaggle
	def sample(self, time, outputs, state, name=None):
		with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample",
							[time, outputs, state]):
			# Return -1s where we did not sample, and sample_ids elsewhere
			select_sampler = bernoulli.Bernoulli(
				probs=self._sampling_probability, dtype=dtypes.bool)
			select_sample = select_sampler.sample(
				sample_shape=self.batch_size, seed=self._scheduling_seed)
			
# 			self.logs = tf.Print(select_sample, [select_sample])
# 			sample_id_sampler = categorical.Categorical(logits=outputs)
			sample_ids = math_ops.cast(math_ops.argmax(outputs, axis=-1), dtypes.int32)
# 			select_sample = tf.ones(shape=(self.batch_size,), dtype=dtypes.bool, name="test")
			return array_ops.where(
				select_sample,
				sample_ids,
				gen_array_ops.fill([self.batch_size], -1))
コード例 #10
0
ファイル: array_ops.py プロジェクト: zsdonghao/tensorlayer
def alphas(shape, alpha_value, name=None):
    """Creates a tensor with all elements set to `alpha_value`.
    This operation returns a tensor of type `dtype` with shape `shape` and all
    elements set to alpha.

    Parameters
    ----------
    shape: A list of integers, a tuple of integers, or a 1-D `Tensor` of type `int32`.
        The shape of the desired tensor
    alpha_value: `float32`, `float64`, `int8`, `uint8`, `int16`, `uint16`, int32`, `int64`
        The value used to fill the resulting `Tensor`.
    name: str
        A name for the operation (optional).

    Returns
    -------
    A `Tensor` with all elements set to alpha.

    Examples
    --------
    >>> tl.alphas([2, 3], tf.int32)  # [[alpha, alpha, alpha], [alpha, alpha, alpha]]
    """
    with ops.name_scope(name, "alphas", [shape]) as name:

        alpha_tensor = convert_to_tensor(alpha_value)
        alpha_dtype = dtypes.as_dtype(alpha_tensor.dtype).base_dtype

        if not isinstance(shape, ops.Tensor):
            try:
                shape = constant_op._tensor_shape_tensor_conversion_function(tensor_shape.TensorShape(shape))
            except (TypeError, ValueError):
                shape = ops.convert_to_tensor(shape, dtype=dtypes.int32)

        if not shape._shape_tuple():
            shape = reshape(shape, [-1])  # Ensure it's a vector

        try:
            output = constant(alpha_value, shape=shape, dtype=alpha_dtype, name=name)

        except (TypeError, ValueError):
            output = fill(shape, constant(alpha_value, dtype=alpha_dtype), name=name)

        if output.dtype.base_dtype != alpha_dtype:
            raise AssertionError("Dtypes do not corresponds: %s and %s" % (output.dtype.base_dtype, alpha_dtype))

        return output
コード例 #11
0
  def scheduled_sampling(self, batch_size, sampling_probability, true, estimate):
    with variable_scope.variable_scope("ScheduledEmbedding"):
      # Return -1s where we do not sample, and sample_ids elsewhere
      select_sampler = bernoulli.Bernoulli(probs=sampling_probability, dtype=tf.bool)
      select_sample = select_sampler.sample(sample_shape=batch_size)
      sample_ids = array_ops.where(
                  select_sample,
                  tf.range(batch_size),
                  gen_array_ops.fill([batch_size], -1))
      where_sampling = math_ops.cast(
          array_ops.where(sample_ids > -1), tf.int32)
      where_not_sampling = math_ops.cast(
          array_ops.where(sample_ids <= -1), tf.int32)
      _estimate = array_ops.gather_nd(estimate, where_sampling)
      _true = array_ops.gather_nd(true, where_not_sampling)

      base_shape = array_ops.shape(true)
      result1 = array_ops.scatter_nd(indices=where_sampling, updates=_estimate, shape=base_shape)
      result2 = array_ops.scatter_nd(indices=where_not_sampling, updates=_true, shape=base_shape)
      result = result1 + result2
      return result1 + result2
コード例 #12
0
  def scheduled_sampling(self, batch_size, sampling_probability, true, estimate):
    with variable_scope.variable_scope("ScheduledEmbedding"):
      # Return -1s where we do not sample, and sample_ids elsewhere
      select_sampler = bernoulli.Bernoulli(probs=sampling_probability, dtype=tf.bool)
      select_sample = select_sampler.sample(sample_shape=batch_size)
      sample_ids = array_ops.where(
                  select_sample,
                  tf.range(batch_size),
                  gen_array_ops.fill([batch_size], -1))
      where_sampling = math_ops.cast(
          array_ops.where(sample_ids > -1), tf.int32)
      where_not_sampling = math_ops.cast(
          array_ops.where(sample_ids <= -1), tf.int32)
      _estimate = array_ops.gather_nd(estimate, where_sampling)
      _true = array_ops.gather_nd(true, where_not_sampling)

      base_shape = array_ops.shape(true)
      result1 = array_ops.scatter_nd(indices=where_sampling, updates=_estimate, shape=base_shape)
      result2 = array_ops.scatter_nd(indices=where_not_sampling, updates=_true, shape=base_shape)
      result = result1 + result2
      return result1 + result2
コード例 #13
0
def gen_crossentropy(y_true, y_pred, q=0.7, k=-1.0):
    # Filter true values ("y_true") in "y_pred"
    y_ok = array_ops.boolean_mask(y_pred, gen_math_ops.equal(y_true, 1))
    # Conversion for Float64 for valid operations in TensorFlow
    um = np.float64(1.)
    q = np.float64(q)

    if k == -1:  # cross entropy loss
        # mean[ (1-y_ok^q)/q ]
        return K.mean(math_ops.divide(
            math_ops.subtract(um, math_ops.pow(y_ok, q)), q),
                      axis=-1)
    else:  # truncated cross entropy loss

        k = np.float64(k)
        # if y_ok < k
        #     [ (1-k^q)/q    ]  (no broadcasting in Where())
        #     [ (1-y_ok^q)/q ]
        vfunct = array_ops.where(
            gen_math_ops.less_equal(y_ok, k),
            gen_array_ops.fill(array_ops.shape(y_ok), (um - k**q) / q),
            math_ops.divide(math_ops.subtract(um, math_ops.pow(y_ok, q)), q))
        return K.mean(vfunct, axis=-1)  # mean [ above values ]
コード例 #14
0
def scheduled_sampling_vocab_dist(hps, sampling_probability, output, embedding, inp, alpha = 0):
  # borrowed ideas from https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/ScheduledEmbeddingTrainingHelper

  def soft_argmax(alpha, output):
    #alpha_exp = tf.exp(alpha * output) # (batch_size, vocab_size)
    #one_hot_scores = alpha_exp / tf.reshape(tf.reduce_sum(alpha_exp, axis=1),[-1,1]) #(batch_size, vocab_size)
    one_hot_scores = tf.nn.softmax(alpha * output)
    return one_hot_scores

  def soft_top_k(alpha, output, K):
    copy = tf.identity(output)
    p = []
    arg_top_k = []
    for k in range(K):
      sargmax = soft_argmax(alpha, copy)
      copy = (1-sargmax)* copy
      p.append(tf.reduce_sum(sargmax * output, axis=1))
      arg_top_k.append(sargmax)

    return tf.stack(p, axis=1), tf.stack(arg_top_k)

  with variable_scope.variable_scope("ScheduledEmbedding"):
    # Return -1s where we did not sample, and sample_ids elsewhere
    select_sampler = bernoulli.Bernoulli(probs=sampling_probability, dtype=tf.bool)
    select_sample = select_sampler.sample(sample_shape=hps.batch_size)
    sample_id_sampler = categorical.Categorical(probs=output) # equals to argmax{ Multinomial(output, total_count=1) }, our greedy search selection
    sample_ids = array_ops.where(
            select_sample,
            sample_id_sampler.sample(seed=123),
            gen_array_ops.fill([hps.batch_size], -1))

    where_sampling = math_ops.cast(
        array_ops.where(sample_ids > -1), tf.int32)
    where_not_sampling = math_ops.cast(
        array_ops.where(sample_ids <= -1), tf.int32)

    if hps.greedy_scheduled_sampling:
      sample_ids = tf.argmax(output, axis=1, output_type=tf.int32)

    sample_ids_sampling = array_ops.gather_nd(sample_ids, where_sampling)
    inputs_not_sampling = array_ops.gather_nd(inp, where_not_sampling)

    if hps.E2EBackProp:
      if hps.hard_argmax:
        greedy_search_prob, greedy_search_sample = tf.nn.top_k(output, k=hps.k) # (batch_size, k)
        greedy_search_prob_normalized = greedy_search_prob/tf.reshape(tf.reduce_sum(greedy_search_prob,axis=1),[-1,1])
        greedy_embedding = tf.nn.embedding_lookup(embedding, greedy_search_sample)
        normalized_embedding = tf.multiply(tf.reshape(greedy_search_prob_normalized,[hps.batch_size,hps.k,1]), greedy_embedding)
        e2e_embedding = tf.reduce_mean(normalized_embedding,axis=1)
      else:
        e = []
        greedy_search_prob, greedy_search_sample = soft_top_k(alpha, output,
                                                              K=hps.k)  # (batch_size, k), (k, batch_size, vocab_size)
        greedy_search_prob_normalized = greedy_search_prob / tf.reshape(tf.reduce_sum(greedy_search_prob, axis=1),
                                                                        [-1, 1])

        for _ in range(hps.k):
          a_k = greedy_search_sample[_]
          e_k = tf.matmul(tf.reshape(greedy_search_prob_normalized[:,_],[-1,1]) * a_k, embedding)
          e.append(e_k)
        e2e_embedding = tf.reduce_sum(e, axis=0) # (batch_size, emb_dim)
      sampled_next_inputs = array_ops.gather_nd(e2e_embedding, where_sampling)
    else:
      if hps.hard_argmax:
        sampled_next_inputs = tf.nn.embedding_lookup(embedding, sample_ids_sampling)
      else: # using soft armax (greedy) proposed in: https://arxiv.org/abs/1704.06970
        #alpha_exp = tf.exp(alpha * (output_not_extended + G)) # (batch_size, vocab_size)
        #one_hot_scores = alpha_exp / tf.reduce_sum(alpha_exp, axis=1) #(batch_size, vocab_size)
        one_hot_scores = soft_argmax(alpha, output) #(batch_size, vocab_size)
        soft_argmax_embedding = tf.matmul(one_hot_scores, embedding) #(batch_size, emb_size)
        sampled_next_inputs = array_ops.gather_nd(soft_argmax_embedding, where_sampling)

    base_shape = array_ops.shape(inp)
    result1 = array_ops.scatter_nd(indices=where_sampling, updates=sampled_next_inputs, shape=base_shape)
    result2 = array_ops.scatter_nd(indices=where_not_sampling, updates=inputs_not_sampling, shape=base_shape)
    return result1 + result2
コード例 #15
0
def scheduled_sampling(hps,
                       sampling_probability,
                       output,
                       embedding,
                       inp,
                       alpha=0):
    """No teacher forcing, sampling decoder input for current step, either right word or generated word

    Args:
      hps: model hyper parameters
      sampling_probability: probability of the sampler for current step
      output: decoder output for prevsious step, (Batch_size, extended_vsize)
      embedding: model embeding, (vocab_size, embed_dim)
      inp: decoder inputs for current step, (Batch_size, embed_dim)
      alpha: soft argmax argument
    """
    # borrowed ideas from https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/ScheduledEmbeddingTrainingHelper
    vocab_size = embedding.get_shape()[0]

    def soft_argmax(alpha, _output):
        """Soft argmax is derivative, transform argmax into a soft vector, computed orignal vocab dist and normalize it
        Args:
          alpha: soft argmax argument
          _output: decoder output for every step, (Batch_size, extended_vsize)
        Returns:
          (Batch_size, vocab_size)
        """

        # new_oov_scores, (Batch_size, 1)
        new_oov_scores = tf.reshape(
            _output[:, 0] + tf.reduce_sum(_output[:, vocab_size:], axis=1),
            [-1, 1])  # add score for all OOV to the UNK score

        # _output, (Batch_size, vocab_size)
        _output = tf.concat([new_oov_scores, _output[:, 1:vocab_size]],
                            axis=1)  # select only the vocab_size outputs
        _output = _output / tf.reshape(tf.reduce_sum(output, axis=1),
                                       [-1, 1])  # re-normalize scores

        # alpha_exp = tf.exp(alpha * _output) # (batch_size, vocab_size)
        # one_hot_scores = alpha_exp / tf.reshape(tf.reduce_sum(alpha_exp, axis=1),[-1,1]) #(batch_size, vocab_size)
        one_hot_scores = tf.nn.softmax((alpha * _output))
        return one_hot_scores

    def soft_top_k(alpha, _output, K):
        """
        Args:
          alpha: same as soft argmax
          _output: index
        Returns:
          (Batch_size, K), (Batch_size, K, vocab_size)
        """
        copy = tf.identity(_output)
        p = []
        arg_top_k = []
        for k in range(K):
            sargmax = soft_argmax(alpha, copy)
            copy = (1 - sargmax) * copy
            p.append(tf.reduce_sum(sargmax * _output, axis=1))
            arg_top_k.append(sargmax)

        return tf.stack(p, axis=1), tf.stack(arg_top_k)

    with variable_scope.variable_scope("ScheduledEmbedding"):
        # Return -1s where we did not sample, and sample_ids elsewhere

        # decide whether sampling and which word to sample for every data in current batch
        select_sampler = bernoulli.Bernoulli(probs=sampling_probability,
                                             dtype=tf.bool)
        select_sample = select_sampler.sample(sample_shape=hps.batch_size)
        sample_id_sampler = categorical.Categorical(
            probs=output
        )  # equals to argmax{ Multinomial(output, total_count=1) }, our greedy search selection
        sample_ids = array_ops.where(select_sample,
                                     sample_id_sampler.sample(seed=123),
                                     gen_array_ops.fill([hps.batch_size], -1))

        # sample_ids, (Batch_size)
        where_sampling = math_ops.cast(array_ops.where(sample_ids > -1),
                                       tf.int32)
        where_not_sampling = math_ops.cast(array_ops.where(sample_ids <= -1),
                                           tf.int32)

        if hps.greedy_scheduled_sampling:
            # sample_ids (batch_size, 1)
            sample_ids = tf.argmax(output, axis=1, output_type=tf.int32)

        sample_ids_sampling = array_ops.gather_nd(sample_ids, where_sampling)

        cond = tf.less(sample_ids_sampling, vocab_size)  # replace oov with unk
        sample_ids_sampling = tf.cast(cond, tf.int32) * sample_ids_sampling
        inputs_not_sampling = array_ops.gather_nd(inp, where_not_sampling)

        if hps.E2EBackProp:
            if hps.hard_argmax:
                greedy_search_prob, greedy_search_sample = tf.nn.top_k(
                    output, k=hps.k)  # (batch_size, k)
                greedy_search_prob_normalized = greedy_search_prob / tf.reshape(
                    tf.reduce_sum(greedy_search_prob, axis=1), [-1, 1])

                cond = tf.less(greedy_search_sample,
                               vocab_size)  # replace oov with unk
                greedy_search_sample = tf.cast(cond,
                                               tf.int32) * greedy_search_sample

                greedy_embedding = tf.nn.embedding_lookup(
                    embedding, greedy_search_sample)
                normalized_embedding = tf.multiply(
                    tf.reshape(greedy_search_prob_normalized,
                               [hps.batch_size, hps.k, 1]), greedy_embedding)
                e2e_embedding = tf.reduce_mean(normalized_embedding, axis=1)
            else:
                e = []
                greedy_search_prob, greedy_search_sample = soft_top_k(
                    alpha, output,
                    K=hps.k)  # (batch_size, k), (k, batch_size, vocab_size)
                greedy_search_prob_normalized = greedy_search_prob / tf.reshape(
                    tf.reduce_sum(greedy_search_prob, axis=1), [-1, 1])

                for _ in range(hps.k):
                    a_k = greedy_search_sample[_]
                    e_k = tf.matmul(
                        tf.reshape(greedy_search_prob_normalized[:, _],
                                   [-1, 1]) * a_k, embedding)
                    e.append(e_k)
                e2e_embedding = tf.reduce_sum(e,
                                              axis=0)  # (batch_size, emb_dim)
            sampled_next_inputs = array_ops.gather_nd(e2e_embedding,
                                                      where_sampling)
        else:
            if hps.hard_argmax:
                sampled_next_inputs = tf.nn.embedding_lookup(
                    embedding, sample_ids_sampling)
            else:  # using soft armax (greedy) proposed in: https://arxiv.org/abs/1704.06970
                # alpha_exp = tf.exp(alpha * (output_not_extended + G)) # (batch_size, vocab_size)
                # one_hot_scores = alpha_exp / tf.reduce_sum(alpha_exp, axis=1) #(batch_size, vocab_size)
                one_hot_scores = soft_argmax(
                    alpha, output)  # (batch_size, vocab_size)
                soft_argmax_embedding = tf.matmul(
                    one_hot_scores, embedding)  # (batch_size, emb_size)
                sampled_next_inputs = array_ops.gather_nd(
                    soft_argmax_embedding, where_sampling)

        base_shape = array_ops.shape(inp)
        result1 = array_ops.scatter_nd(indices=where_sampling,
                                       updates=sampled_next_inputs,
                                       shape=base_shape)
        result2 = array_ops.scatter_nd(indices=where_not_sampling,
                                       updates=inputs_not_sampling,
                                       shape=base_shape)
        return result1 + result2
コード例 #16
0
def scheduled_sampling(hps, sampling_probability, output, embedding, inp):
    vocab_size = embedding.get_shape()[0].value
    with variable_scope.variable_scope("ScheduleEmbedding"):
        select_sampler = bernoulli.Bernoulli(probs=sampling_probability, dtype=tf.bool)
        select_sample = select_sampler.sample(sample_shape=hps.batch_size)
        sample_id_sampler = categorical.Categorical(probs=output)
        sample_ids = array_ops.where(select_sample, sample_id_sampler.sample(seed=123), gen_array_ops.fill([hps.batch_size],-1))
        where_sampling = math_ops.cast(array_ops.where(sample_ids > -1), tf.int32)
        where_not_sampling = math_ops.cast(array_ops.where(sample_ids <= -1), tf.int32)
        sample_ids_sampling = array_ops.gather_nd(sample_ids, where_sampling)
        cond = tf.less(sample_ids_sampling, vocab_size)
        sample_ids_sampling = tf.cast(cond, tf.int32) * sample_ids_sampling
        inputs_not_sampling = array_ops.gather_nd(inp, where_not_sampling)
        sampling_next_inputs = tf.nn.embedding_lookup(embedding, sample_ids_sampling)
        result1 = array_ops.scatter_nd(indices=where_sampling, updates=sampling_next_inputs, shape=array_ops.shape(inp))
        result2 = array_ops.scatter_nd(indices=where_not_sampling, updates=inputs_not_sampling, shape=array_ops.shape(inp))
        return result1 + result2
コード例 #17
0
ファイル: adagrad.py プロジェクト: Harryi0/tinyML
 def init():
     # Use a Tensor instead of initializer if variable does not have
     # static shape.
     init_constant = gen_array_ops.fill(array_ops.shape(v),
                                        self._initial_accumulator_value)
     return math_ops.cast(init_constant, dtype)
コード例 #18
0
ファイル: adagrad.py プロジェクト: Ajaycs99/tensorflow
 def init(v=v, dtype=dtype):
   # Use a Tensor instead of initializer if variable does not have
   # static shape.
   init_constant = gen_array_ops.fill(
       array_ops.shape(v), self._initial_accumulator_value)
   return math_ops.cast(init_constant, dtype)
コード例 #19
0
def scheduled_sampling(hps,
                       sampling_probability,
                       output,
                       embedding,
                       inp,
                       alpha=0):
    # borrowed ideas from https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/ScheduledEmbeddingTrainingHelper
    vocab_size = embedding.get_shape()[0].value

    def soft_argmax(alpha, output):
        alpha_exp = tf.exp(alpha * output)  # (batch_size, vocab_size)
        one_hot_scores = alpha_exp / tf.reshape(
            tf.reduce_sum(alpha_exp, axis=1),
            [-1, 1])  #(batch_size, vocab_size)
        return one_hot_scores

    def soft_top_k(alpha, output, K):
        copy = tf.identity(output)
        p = []
        arg_top_k = []
        for k in range(K):
            sargmax = soft_argmax(alpha, copy)
            copy = (1 - sargmax) * copy
            p.append(tf.reduce_sum(sargmax * output, axis=1))
            # replace oov with unk if necessary
            mask = tf.equal(tf.reduce_max(sargmax, axis=1),
                            tf.reduce_max(sargmax[:, 0:vocab_size], axis=1))
            sargmax_truncated = tf.where(
                mask, sargmax[:, 0:vocab_size],
                tf.stack([
                    tf.one_hot(0, vocab_size) for _ in range(hps.batch_size)
                ]))

            arg_top_k.append(sargmax_truncated)

        return p, tf.stack(arg_top_k)

    with variable_scope.variable_scope("ScheduledEmbedding"):
        # Return -1s where we did not sample, and sample_ids elsewhere
        select_sampler = bernoulli.Bernoulli(probs=sampling_probability,
                                             dtype=tf.bool)
        select_sample = select_sampler.sample(sample_shape=hps.batch_size)
        sample_id_sampler = categorical.Categorical(
            probs=output
        )  # equals to argmax{ Multinomial(output, total_count=1) }, our greedy search selection
        sample_ids = array_ops.where(select_sample,
                                     sample_id_sampler.sample(seed=123),
                                     gen_array_ops.fill([hps.batch_size], -1))

        where_sampling = math_ops.cast(array_ops.where(sample_ids > -1),
                                       tf.int32)
        where_not_sampling = math_ops.cast(array_ops.where(sample_ids <= -1),
                                           tf.int32)

        if hps.greedy_scheduled_sampling:
            sample_ids = tf.argmax(output, axis=1, output_type=tf.int32)

        sample_ids_sampling = array_ops.gather_nd(sample_ids, where_sampling)

        cond = tf.less(sample_ids_sampling, vocab_size)  # replace oov with unk
        sample_ids_sampling = tf.cast(cond, tf.int32) * sample_ids_sampling
        inputs_not_sampling = array_ops.gather_nd(inp, where_not_sampling)

        if hps.E2EBackProp:
            if hps.hard_argmax:
                greedy_search_prob, greedy_search_sample = tf.nn.top_k(
                    output, k=hps.k)  # (batch_size, k)
                greedy_search_prob_normalized = greedy_search_prob / tf.reshape(
                    tf.reduce_sum(greedy_search_prob, axis=1), [-1, 1])

                cond = tf.less(greedy_search_sample,
                               vocab_size)  # replace oov with unk
                greedy_search_sample = tf.cast(cond,
                                               tf.int32) * greedy_search_sample

                greedy_embedding = tf.nn.embedding_lookup(
                    embedding, greedy_search_sample)
                normalized_embedding = tf.multiply(
                    tf.reshape(greedy_search_prob_normalized,
                               [hps.batch_size, hps.k, 1]), greedy_embedding)
                e2e_embedding = tf.reduce_sum(normalized_embedding, axis=1)
            else:
                e = []
                greedy_search_prob, greedy_search_sample = soft_top_k(
                    alpha, output, K=hps.k)  # (batch_size, k), (k, vocab_size)
                greedy_search_prob_normalized = greedy_search_prob / tf.reshape(
                    tf.reduce_sum(greedy_search_prob, axis=1), [-1, 1])

                for _ in range(hps.k):
                    a_k = greedy_search_sample[_]
                    e_k = tf.matmul(
                        tf.reshape(greedy_search_prob_normalized[:, _],
                                   [-1, 1]) * a_k, embedding)
                    e.append(e_k)
                e2e_embedding = tf.reduce_sum(e,
                                              axis=0)  # (batch_size, emb_dim)
            sampled_next_inputs = array_ops.gather_nd(e2e_embedding,
                                                      where_sampling)
        else:
            if hps.hard_argmax:
                sampled_next_inputs = tf.nn.embedding_lookup(
                    embedding, sample_ids_sampling)
            else:  # using soft armax (greedy) proposed in: https://arxiv.org/abs/1704.06970
                if not hps.greedy_scheduled_sampling:
                    # Gumbel reparametrization trick: https://arxiv.org/abs/1704.06970
                    U = tf.random_uniform(
                        (hps.batch_size, vocab_size), 10e-12,
                        (1 - 10e-12))  # add a small number to avoid log(0)
                    G = -tf.log(-tf.log(U))
                else:
                    G = tf.zeros((hps.batch_size, vocab_size))
                #alpha_exp = tf.exp(alpha * (output_not_extended + G)) # (batch_size, vocab_size)
                #one_hot_scores = alpha_exp / tf.reduce_sum(alpha_exp, axis=1) #(batch_size, vocab_size)
                one_hot_scores = soft_argmax(
                    alpha, (output + G))  #(batch_size, vocab_size)
                sampled_next_inputs = tf.matmul(
                    one_hot_scores, embedding)  #(batch_size, emb_size)

        base_shape = array_ops.shape(inp)
        result1 = array_ops.scatter_nd(indices=where_sampling,
                                       updates=sampled_next_inputs,
                                       shape=base_shape)
        result2 = array_ops.scatter_nd(indices=where_not_sampling,
                                       updates=inputs_not_sampling,
                                       shape=base_shape)
        return result1 + result2