Ejemplo n.º 1
0
    def __call__(self, inputs, lengths=None, is_training=None):
        """

        :param inputs: [B, T, input_dim]
        :param lengths: [B]
        :return: [B, T, output_units]
        """
        with tf.variable_scope(self.name, reuse=self.reuse):
            h = inputs
            for i, layer in enumerate(self._hidden_layers):
                if type(layer) in [tuple, list]:
                    h = dense(inputs=h,
                              units=layer[0],
                              activation=layer[1],
                              dropout=self._dropout,
                              is_training=is_training)
                elif type(layer) is FeedForwardEncoder:
                    h = layer(h, lengths=lengths, is_training=is_training)
                else:
                    raise ValueError('Unknown type of layer: %s' % type(layer))
            outputs = dense(inputs=h,
                            units=self._num_units,
                            activation=self._activation_fn,
                            dropout=self._dropout,
                            is_training=is_training)
            return outputs
Ejemplo n.º 2
0
def attention_logits(
        inputs,  # [B, M, d]
        outputs,  # [B, N, d]
        units,
        dropout=None,
        is_training=None,
        name='attention-layer',
        reuse=None):
    with tf.variable_scope(name, reuse=reuse):
        # [B, M, d]
        keys = dense(
            inputs=inputs,  # [B, M, dx]
            units=units,
            activation=None,
            name='keys',
            dropout=dropout,
            is_training=is_training)
        # [B, N, d]
        queries = dense(
            inputs=outputs,  # [B, N, dy]
            units=units,
            activation=None,
            name='queries',
            dropout=dropout,
            is_training=is_training)

        # prediction of Categorical parameters of P(A_j|x_1^m, y_<j) via dot product
        # [B, N, M]
        logits = tf.matmul(
            queries,  # [B, N, dh]
            keys,  # [B, M, dh]
            transpose_b=True)

    # [B, N, M]
    return logits
Ejemplo n.º 3
0
def delta_tensor(shape,
                 inputs,
                 var_name,
                 activation=tf.identity,
                 initializer=None,
                 summary=False,
                 dropout=0.,
                 is_training=None):
    """
    Returns a tensor filled with `dim` parameters or predictions.
    :param shape: [batch_size, max_time, dim]
    :param inputs: if not None, then we predict a tensor [batch_size, max_time, dim]
        otherwise we tile `dim` parameters to make a [batch_size, max_time, dim] tensor
    :param var_name:
    :param activation:
    :param summary:
    :param dropout:
    :param is_training:
    :return:
    """
    dim = shape[-1]
    if inputs is None:
        logging.info('  %s = tensor(%s) with t a parameter', var_name,
                     pprint_activation(activation, 't'))
        # here we have `dim` parameters
        with tf.variable_scope(var_name):
            outputs = activation(
                tf.get_variable(name='pre_t',
                                shape=dim if initializer is None else None,
                                dtype=tf.float32,
                                initializer=initializer))
            expanded_shape = [1] * len(shape)
            expanded_shape[-1] = dim  # e.g. [1, 1, dim]
            copies = list(shape)
            copies[-1] = 1  # e.g. [B, T, 1]
            outputs = tf.tile(tf.reshape(outputs, expanded_shape), copies)
            if summary:
                tf.summary.histogram('parameter', outputs)
    else:
        logging.info('  %s = tensor(%s) with t a prediction', var_name,
                     pprint_activation(activation, 't'))
        # here we predict a tensor with shape `shape`
        with tf.variable_scope(var_name):
            outputs = dense(inputs=inputs,
                            units=dim,
                            activation=activation,
                            dropout=dropout,
                            is_training=is_training)
            if summary:
                tf.summary.histogram('prediction', outputs)
    return outputs
Ejemplo n.º 4
0
    def __call__(self, inputs, lengths=None, is_training=None):
        """

        :param inputs: [B, T, input_dim]
        :param lengths: [B]
        :return: [B, T, output_units]
        """
        with tf.variable_scope(self.name, reuse=self.reuse):
            outputs = dense(inputs=inputs,
                            units=self._num_units,
                            activation=self._activation_fn,
                            use_bias=self._use_bias,
                            dropout=self._dropout,
                            is_training=is_training)
            return outputs
Ejemplo n.º 5
0
def self_attention_layer(inputs,
                         num_steps,
                         units,
                         dropout=None,
                         is_training=None,
                         activation=tf.nn.softmax,
                         mask_diagonal=False,
                         mask_value=float('-inf'),
                         name='SelfAttention',
                         reuse=None):
    """
    Compute self attention levels (masking invalid positions).

    :param inputs: [batch_size, max_time, dim]
    :param num_steps: number of steps per training instance [batch_size]
    :param units: number of query/key units
    :param activation: defaults to tf.nn.softmax for normalised attention
    :param mask_diagonal: defaults to False
    :param mask_value: defaults to -inf
    :param name: defaults to SelfAttention
    :param reuse: passed to tf layers (defaults to None)
    :return: [batch_size, max_time, max_time]
    """
    batch_size = tf.shape(inputs)[0]  # B
    longest = tf.shape(inputs)[1]  # M
    with tf.variable_scope(name):
        # [B, M, d]
        queries = dense(inputs,
                        units=units,
                        dropout=dropout,
                        is_training=is_training,
                        name='queries',
                        reuse=reuse)
        keys = dense(inputs,
                     units=units,
                     dropout=dropout,
                     is_training=is_training,
                     name='keys',
                     reuse=reuse)
        # [B, M, M]
        scores = tf.matmul(
            queries,  # [B, M, d]
            keys,  # [B, M, d]
            transpose_b=True)
        # mask invalid logits
        # [B, M, M]
        condition = tf.tile(
            # make the boolean mask [B, 1, M]
            tf.expand_dims(
                # get a boolean mask [B, M]
                tf.sequence_mask(num_steps, maxlen=longest),
                1),
            [1, longest, 1])
        scores = tf.where(
            # make the boolean mask [B, M, M]
            condition=condition,
            x=scores,
            y=tf.ones(shape=[batch_size, longest, longest]) * mask_value)
        # mask diagonal
        if mask_diagonal:
            scores += tf.diag(tf.fill([tf.shape(scores)[-1]], mask_value))
        # Normalise attention
        # [B, M, M]
        #outputs = tf.where(
        #    condition=condition,
        #    x=activation(scores),
        #    y=tf.zeros_like(scores)
        #)
        return activation(scores)
Ejemplo n.º 6
0
    def build(self, inputs, lengths=None, is_training=None):
        """

        :param inputs: [B, T, input_dim]
        :param lengths: [B]
        :return: outputs [B, T, output_units], states [B, T, output_units]
        """
        if self._cell_type == 'lstm':
            cell_class = tf.contrib.rnn.BasicLSTMCell
        elif self._cell_type == 'gru':
            cell_class = tf.contrib.rnn.GRUCell
        else:
            raise ValueError('Unknown cell_type=%s' % self._cell_type)

        with tf.variable_scope(self.name, reuse=self.reuse):
            # decide how many units per cell
            if self._merge_strategy == 'half':
                num_units = self._num_units // 2
            else:
                num_units = self._num_units
            cell_fw = cell_class(num_units=num_units)
            cell_bw = cell_class(num_units=num_units)
            if self._dropout > 0.:
                if is_training is None:
                    raise ValueError('With dropout I require a training flag')
                keep_prob = 1. - self._dropout
                cell_fw = tf.contrib.rnn.DropoutWrapper(
                    cell_fw,
                    output_keep_prob=tf.where(is_training, keep_prob, 1.),
                    state_keep_prob=tf.where(is_training, keep_prob, 1.),
                    variational_recurrent=self._variational_recurrent,
                    dtype=inputs.dtype,
                    input_size=lengths  # [B]
                )
                cell_bw = tf.contrib.rnn.DropoutWrapper(
                    cell_bw,
                    output_keep_prob=tf.where(is_training, keep_prob, 1.),
                    state_keep_prob=tf.where(is_training, keep_prob, 1.),
                    variational_recurrent=self._variational_recurrent,
                    dtype=inputs.dtype,
                    input_size=lengths  # [B]
                )

            with tf.variable_scope(
                    dynamic_rnn_scope_name(self._dropout,
                                           self._variational_recurrent)):
                (outputs_fw,
                 outputs_bw), (states_fw,
                               states_bw) = tf.nn.bidirectional_dynamic_rnn(
                                   cell_fw=cell_fw,
                                   cell_bw=cell_bw,
                                   inputs=inputs,
                                   sequence_length=lengths,
                                   dtype=tf.float32)

            if self._merge_strategy == 'sum':
                # [B, T, num_units]
                outputs = tf.add(outputs_fw, outputs_bw)
                states = tf.add(states_fw, states_bw)
            else:
                # [B, T, num_units *  2]
                outputs = tf.concat([outputs_fw, outputs_bw], -1)
                states = tf.concat([states_fw, states_bw], -1)

            if self._merge_strategy == 'project':
                # num_units * 2 => output_units
                outputs = dense(inputs=outputs,
                                units=self.output_units,
                                activation=None,
                                use_bias=False,
                                dropout=self._ff_dropout,
                                is_training=is_training)

            if self._residual:
                outputs += inputs

            return outputs, states
Ejemplo n.º 7
0
    def __call__(self, inputs, lengths=None, is_training=None):
        """

        :param inputs: [B, T, input_units]
        :param lengths: [B]
        :return: outputs=inputs [B, T, output_units=input_units]
        """

        if lengths is None:
            raise ValueError('I need lengths to figure out what is the past')

        memory = self.padleft(inputs)
        lengthsp1 = lengths + 1
        batch_size = tf.shape(memory)[0]
        longestp1 = tf.shape(memory)[1]

        with tf.variable_scope(self.name, reuse=self.reuse):
            # [B, T, d]
            queries = dense(inputs=memory,
                            units=self.output_units,
                            use_bias=False,
                            dropout=self._dropout,
                            is_training=is_training,
                            name='queries',
                            reuse=self.reuse)
            keys = memory
            # [B, T, T]
            scores = tf.matmul(
                queries,  # [B, T, d]
                keys,  # [B, T, d]
                transpose_b=True)
            # mask invalid logits
            inf = tf.fill([batch_size, longestp1, longestp1], float('-inf'))
            scores = tf.where(
                # make the boolean mask [B, T, T]
                condition=tf.tile(
                    # make the boolean mask [B, 1, T]
                    tf.expand_dims(
                        tf.sequence_mask(lengthsp1, maxlen=longestp1), 1),
                    [1, longestp1, 1]),
                x=scores,
                y=inf)
            # mask diagonal past
            # [B, T, T]
            past_mask = tf.cast(
                fill_triangular(
                    tf.ones([batch_size, longestp1 * (longestp1 + 1) // 2
                             ])  # lower triangular (including diag)
                ),
                dtype=tf.bool)
            scores = tf.where(
                # make the boolean mask [B, T, T]
                condition=past_mask,
                x=scores,
                y=inf)
            # [B, T, T]
            adjacencies = tf.nn.softmax(scores)
            outputs = tf.matmul(
                adjacencies,  # [B, T, T]
                memory  # [B, T, d]
            )
            outputs = self.trimright(outputs)

        return outputs