def _build(self, inputs, seq_len=None, gamma=None):
        if self._hparams.use_embedding:
            if inputs.get_shape().ndims == 2:
                inputs = tf.nn.embedding_lookup(self._embedding, inputs)
            elif inputs.get_shape().ndims == 3:
                inputs_shape = inputs.get_shape()
                inputs = tf.matmul(tf.reshape(inputs, [-1, inputs_shape[-1]]),
                                   self._embedding)
                inputs = tf.reshape(
                    inputs, [inputs_shape[0], -1,
                             inputs.get_shape()[-1]])

        scores = tf.ones(tf.shape(inputs)[:2], tf.float32) \
                 / tf.cast(tf.shape(inputs)[1], tf.float32)

        if seq_len is not None:
            mask = tf.sequence_mask(lengths=tf.to_int32(seq_len),
                                    maxlen=tf.to_int32(tf.shape(inputs)[1]),
                                    dtype=tf.float32)
        else:
            mask = tf.ones(tf.shape(inputs)[:2], tf.float32)

        if self._hparams.use_gate:
            proj = tf.tanh(self._gate_proj(inputs))
            if gamma is None:
                gamma = 1.
            scores = tf.reduce_sum(self._gate_u * proj, [2]) / gamma
            scores = scores * mask + ((1.0 - mask) * tf.float32.min)
            scores = tf.nn.softmax(scores)
            if self._hparams.scale_attn:
                scores = scores * tf.reduce_sum(mask, axis=1, keep_dims=True)
            inputs = tf.expand_dims(scores, 2) * inputs
        else:
            inputs = tf.expand_dims(mask, 2) * inputs

        # input keep prob??
        inputs = tf.nn.dropout(inputs,
                               switch_dropout(self._hparams.input_keep_prob))

        pooled_outputs = []
        for conv_layer in self._conv_layers:
            h = conv_layer(inputs)
            h = tf.nn.leaky_relu(h, alpha=self._hparams.leaky_relu_alpha)
            # pooling after conv
            h = tf.reduce_max(h, axis=1)
            pooled_outputs.append(h)

        outputs = tf.concat(pooled_outputs, 1)
        outputs = tf.nn.dropout(outputs,
                                switch_dropout(self._hparams.output_keep_prob))

        logits = self._proj_layer(outputs)

        self._add_internal_trainable_variables()
        self._built = True

        return logits
Esempio n. 2
0
def get_rnn_cell(hparams=None, mode=None):
    """Creates an RNN cell.

    See :func:`~texar.core.default_rnn_cell_hparams` for all
    hyperparameters and default values.

    Args:
        hparams (dict or HParams, optional): Cell hyperparameters. Missing
            hyperparameters are set to default values.
        mode (optional): A Tensor taking value in
            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
            `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout will be
            controlled by :func:`texar.global_mode`.

    Returns:
        A cell instance.

    Raises:
        ValueError: If hparams["num_layers"]>1 and hparams["type"] is a class
            instance.
        ValueError: The cell is not an
            :tf_main:`RNNCell <contrib/rnn/RNNCell>` instance.
    """
    if hparams is None or isinstance(hparams, dict):
        hparams = HParams(hparams, default_rnn_cell_hparams())

    d_hp = hparams["dropout"]
    if d_hp["variational_recurrent"] and \
            len(d_hp["input_size"]) != hparams["num_layers"]:
        raise ValueError(
            "If variational_recurrent=True, input_size must be a list of "
            "num_layers(%d) integers. Got len(input_size)=%d." %
            (hparams["num_layers"], len(d_hp["input_size"])))

    cells = []
    cell_kwargs = hparams["kwargs"].todict()
    num_layers = hparams["num_layers"]
    for layer_i in range(num_layers):
        # Create the basic cell
        cell_type = hparams["type"]
        if not is_str(cell_type) and not isinstance(cell_type, type):
            if num_layers > 1:
                raise ValueError(
                    "If 'num_layers'>1, then 'type' must be a cell class or "
                    "its name/module path, rather than a cell instance.")
        cell_modules = ['tensorflow.contrib.rnn', 'texar.custom']
        cell = utils.check_or_get_instance(cell_type, cell_kwargs,
                                           cell_modules, rnn.RNNCell)

        # Optionally add dropout
        if d_hp["input_keep_prob"] < 1.0 or \
                d_hp["output_keep_prob"] < 1.0 or \
                d_hp["state_keep_prob"] < 1.0:
            vr_kwargs = {}
            if d_hp["variational_recurrent"]:
                vr_kwargs = {
                    "variational_recurrent": True,
                    "input_size": d_hp["input_size"][layer_i],
                    "dtype": tf.float32
                }
            input_keep_prob = switch_dropout(d_hp["input_keep_prob"], mode)
            output_keep_prob = switch_dropout(d_hp["output_keep_prob"], mode)
            state_keep_prob = switch_dropout(d_hp["state_keep_prob"], mode)
            cell = rnn.DropoutWrapper(cell=cell,
                                      input_keep_prob=input_keep_prob,
                                      output_keep_prob=output_keep_prob,
                                      state_keep_prob=state_keep_prob,
                                      **vr_kwargs)

        # Optionally add residual and highway connections
        if layer_i > 0:
            if hparams["residual"]:
                cell = rnn.ResidualWrapper(cell)
            if hparams["highway"]:
                cell = rnn.HighwayWrapper(cell)

        cells.append(cell)

    if hparams["num_layers"] > 1:
        cell = rnn.MultiRNNCell(cells)
    else:
        cell = cells[0]

    return cell
Esempio n. 3
0
    def _build(self,
               memory=None,
               query=None,
               soft_memory=None,
               soft_query=None,
               mode=None,
               **kwargs):
        """Pass the :attr:`memory` and :attr:`query` through the memory network
        and return the :attr:`logits` after the final matrix.

        Only one of :attr:`memory` and :attr:`soft_memory` can be specified.
        They should not be specified at the same time.

        Args:
            memory (optional): Memory used in A/C operations. By default, it
                should be an integer tensor of shape
                `[batch_size, memory_size]`,
                containing the ids to embed if provided.
            query (optional): Query vectors as the intial input of the memory
                network.
                If you'd like to apply some transformation (e.g., embedding)
                on it before it's fed into the network, please set `use_B` to
                True and add `query_embed_fn` when constructing this instance.
                If `query_embed_fn` is set to
                :meth:`~texar.modules.MemNetBase.get_default_embed_fn`,
                it should be of shape `[batch_size]`.
                If `use_B` is not set, it should be of shape
                `[batch_size, memory_dim]`.
            soft_memory (optional): Soft memory used in A/C operations. By
                default, it should be a tensor of shape
                `[batch_size, memory_size, raw_memory_dim]`,
                containing the weights used to mix the embedding vectors.
                If you'd like to apply a matrix multiplication on the memory,
                this option can also be used.
            soft_query (optional): Query vectors as the intial input of the
                memory network.
                If you'd like to apply some transformation (e.g., embedding)
                on it before it's fed into the network, please set `use_B` to
                True and add `query_embed_fn` when constructing this instance.
                Similar to :attr:`soft_memory`, if `query_embed_fn` is set to
                :meth:`~texar.modules.MemNetBase.get_default_embed_fn`,
                then it must be of shape `[batch_size, raw_memory_dim]`.
                Ignored if `use_B` is not set.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. If `None`, dropout is
                controlled by :func:`texar.global_mode`.
        """
        if self._B is not None:

            def _unsqueeze(x):
                return x if x is None else tf.expand_dims(x, 1)

            query = tf.squeeze(
                self._B(_unsqueeze(query), _unsqueeze(soft_query), mode=mode),
                1)
        self._u = [query]
        self._m = self._A(memory, soft_memory, mode=mode)
        self._c = self._C(memory, soft_memory, mode=mode)

        keep_prob = switch_dropout(1 - self.hparams.dropout_rate, mode=mode)
        if self.hparams.variational:
            with tf.variable_scope("variational_dropout"):
                noise = tf.random_uniform(tf.shape(self._u[-1]))
                random_tensor = keep_prob + noise
                binary_tensor = tf.floor(random_tensor)

            def _variational_dropout(val):
                return tf.math.div(val, keep_prob) * binary_tensor

        for _ in range(self._n_hops):
            u_ = self._AC(self._u[-1], self._m, self._c)
            if self._relu_dim == 0:
                pass
            elif self._relu_dim == self._memory_dim:
                u_ = tf.nn.relu(u_)
            elif 0 < self._relu_dim < self._memory_dim:
                linear_part = u_[:, :self._memory_dim - self._relu_dim]
                relu_part = u_[:, self._memory_dim - self._relu_dim:]
                relued_part = tf.nn.relu(relu_part)
                u_ = tf.concat(axis=1, values=[linear_part, relued_part])
            else:
                raise ValueError("relu_dim = {} is illegal".format(
                    self._relu_dim))
            if self.hparams.variational:
                u_ = _variational_dropout(u_)
            else:
                u_ = tf.nn.dropout(u_, keep_prob)
            self._u.append(u_)

        logits = self._W(self._u[-1])

        if not self._built:
            self._add_internal_trainable_variables()
            self._built = True

        return logits