Esempio n. 1
0
    def __init__(self,
                 inputs,
                 sequence_length,
                 embedding,
                 sampling_probability,
                 time_major=False,
                 seed=None,
                 scheduling_seed=None,
                 name=None):
        """Initializer.

        Args:
          inputs: A (structure of) input tensors.
          sequence_length: An int32 vector tensor.
          embedding: A callable or the `params` argument for `embedding_lookup`.
            If a callable, it can take a vector tensor of token `ids`,
            or take two arguments (`ids`, `times`), where `ids` is a vector
            tensor of token ids, and `times` is a vector tensor of current
            time steps (i.e., position ids). The latter case can be used when
            attr:`embedding` is a combination of word embedding and position
            embedding.
          sampling_probability: A 0D `float32` tensor: the probability of sampling
            categorically from the output ids instead of reading directly from the
            inputs.
          time_major: Python bool.  Whether the tensors in `inputs` are time major.
            If `False` (default), they are assumed to be batch major.
          seed: The sampling seed.
          scheduling_seed: The schedule decision rule sampling seed.
          name: Name scope for any created operations.

        Raises:
          ValueError: if `sampling_probability` is not a scalar or vector.
        """
        with ops.name_scope(name, "ScheduledEmbeddingSamplingWrapper",
                            [embedding, sampling_probability]):
            if callable(embedding):
                self._embedding_fn = embedding
            else:
                self._embedding_fn = (
                    lambda ids: embedding_ops.embedding_lookup(embedding, ids))

            self._embedding_args_cnt = len(get_args(self._embedding_fn))
            if self._embedding_args_cnt != 1 and self._embedding_args_cnt != 2:
                raise ValueError('`embedding` should expect 1 or 2 arguments.')

            self._sampling_probability = ops.convert_to_tensor(
                sampling_probability, name="sampling_probability")
            if self._sampling_probability.get_shape().ndims not in (0, 1):
                raise ValueError(
                    "sampling_probability must be either a scalar or a vector. "
                    "saw shape: %s" % (self._sampling_probability.get_shape()))
            self._seed = seed
            self._scheduling_seed = scheduling_seed
            super(ScheduledEmbeddingTrainingHelper,
                  self).__init__(inputs=inputs,
                                 sequence_length=sequence_length,
                                 time_major=time_major,
                                 name=name)
Esempio n. 2
0
def get_gradient_clip_fn(hparams=None):
    """Creates a gradient clipping function based on the hyperparameters.

    See the :attr:`gradient_clip` field in
    :meth:`~texar.tf.core.default_optimization_hparams` for all
    hyperparameters and default values.

    The gradient clipping function takes a list of `(gradients, variables)`
    tuples and returns a list of `(clipped_gradients, variables)` tuples.
    Typical examples include
    :tf_main:`tf.clip_by_global_norm <clip_by_global_norm>`,
    :tf_main:`tf.clip_by_value <clip_by_value>`,
    :tf_main:`tf.clip_by_norm <clip_by_norm>`,
    :tf_main:`tf.clip_by_average_norm <clip_by_average_norm>`, etc.

    Args:
        hparams (dict or HParams, optional): hyperparameters. Missing
            hyperparameters are set to default values automatically.

    Returns:
        function or `None`: If hparams["type"] is specified, returns
        the respective function. If hparams["type"] is empty,
        returns `None`.
    """
    if hparams is None or isinstance(hparams, dict):
        hparams = HParams(
            hparams, default_optimization_hparams()["gradient_clip"])
    fn_type = hparams["type"]
    if fn_type is None or fn_type == "":
        return None

    fn_modules = ["tensorflow", "texar.tf.custom"]
    clip_fn = utils.get_function(fn_type, fn_modules)
    clip_fn_args = utils.get_args(clip_fn)
    fn_kwargs = hparams["kwargs"]
    if isinstance(fn_kwargs, HParams):
        fn_kwargs = fn_kwargs.todict()

    def grad_clip_fn(grads_and_vars):
        """Gradient clipping function.

        Args:
            grads_and_vars (list): A list of `(gradients, variables)` tuples.

        Returns:
            list: A list of `(clipped_gradients, variables)` tuples.
        """
        grads, vars_ = zip(*grads_and_vars)
        if clip_fn == tf.clip_by_global_norm:
            clipped_grads, _ = clip_fn(t_list=grads, **fn_kwargs)
        elif 't_list' in clip_fn_args:
            clipped_grads = clip_fn(t_list=grads, **fn_kwargs)
        elif 't' in clip_fn_args:     # e.g., tf.clip_by_value
            clipped_grads = [clip_fn(t=grad, **fn_kwargs) for grad in grads]

        return list(zip(clipped_grads, vars_))

    return grad_clip_fn
Esempio n. 3
0
    def __init__(self,
                 embedding,
                 start_tokens,
                 end_token,
                 tau,
                 embedding_size=None,
                 stop_gradient=False,
                 use_finish=True):
        if callable(embedding):
            self._embedding_fn = embedding

            if embedding_size is None:
                raise ValueError('`embedding_size` must be provided if '
                                 '`embedding` is a callable.')
            self._embedding_size = tf.convert_to_tensor(embedding_size,
                                                        dtype=tf.int32,
                                                        name="embedding_size")
        else:
            self._embedding_fn = (
                lambda soft_ids: soft_embedding_lookup(embedding, soft_ids))
            self._embedding_size = tf.shape(embedding)[0]

        self._start_tokens = tf.convert_to_tensor(start_tokens,
                                                  dtype=tf.int32,
                                                  name="start_tokens")
        self._end_token = tf.convert_to_tensor(end_token,
                                               dtype=tf.int32,
                                               name="end_token")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._batch_size = array_ops.size(start_tokens)
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        soft_start_tokens = tf.one_hot(self._start_tokens,
                                       self._embedding_size,
                                       dtype=tf.float32)
        self._embedding_args_cnt = len(utils.get_args(self._embedding_fn))
        if self._embedding_args_cnt == 1:
            self._start_inputs = self._embedding_fn(soft_ids=soft_start_tokens)
        elif self._embedding_args_cnt == 2:
            # Position index is 0 in the beginning
            times = tf.zeros([self._batch_size], dtype=tf.int32)
            self._start_inputs = self._embedding_fn(soft_ids=soft_start_tokens,
                                                    times=times)
        else:
            raise ValueError('`embedding` should expect 1 or 2 arguments.')

        self._batch_size = tf.size(self._start_tokens)
        self._tau = tau
        self._stop_gradient = stop_gradient
        self._use_finish = use_finish
Esempio n. 4
0
    def __init__(self, embedding, start_tokens, end_token):
        """Initializer.

        Args:
          embedding: A callable or the `params` argument for `embedding_lookup`.
            If a callable, it can take a vector tensor of `ids` (argmax ids),
            or take two arguments (`ids`, `times`), where `ids` is a vector
            tensor of argmax ids, and `times` is a vector tensor of current
            time steps (i.e., position ids). The latter case can be used when
            attr:`embedding` is a combination of word embedding and position
            embedding.
            The returned tensor will be returned by :meth:`next_inputs`.
          start_tokens: `int32` vector shaped `[batch_size]`, the start tokens.
          end_token: `int32` scalar, the token that marks end of decoding.

        Raises:
          ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a
            scalar.
        """
        if callable(embedding):
            self._embedding_fn = embedding
        else:
            self._embedding_fn = (
                lambda ids: embedding_ops.embedding_lookup(embedding, ids))

        self._start_tokens = ops.convert_to_tensor(start_tokens,
                                                   dtype=dtypes.int32,
                                                   name="start_tokens")
        self._end_token = ops.convert_to_tensor(end_token,
                                                dtype=dtypes.int32,
                                                name="end_token")
        if self._start_tokens.get_shape().ndims != 1:
            raise ValueError("start_tokens must be a vector")
        self._batch_size = shape_list(start_tokens)[0]
        if self._end_token.get_shape().ndims != 0:
            raise ValueError("end_token must be a scalar")

        self._embedding_args_cnt = len(get_args(self._embedding_fn))
        if self._embedding_args_cnt == 1:
            self._start_inputs = self._embedding_fn(self._start_tokens)
        elif self._embedding_args_cnt == 2:
            # Position index is 0 in the beginning
            times = tf.zeros([self._batch_size], dtype=tf.int32)
            self._start_inputs = self._embedding_fn(self._start_tokens, times)
        else:
            raise ValueError('`embedding` should expect 1 or 2 arguments.')
Esempio n. 5
0
 def _get_opt(learning_rate=None):
     opt_kwargs = hparams["kwargs"].todict()
     fn_args = set(utils.get_args(opt_class.__init__))
     if 'learning_rate' in fn_args and learning_rate is not None:
         opt_kwargs["learning_rate"] = learning_rate
     return opt_class(**opt_kwargs)