Ejemplo n.º 1
0
    def _build(self, incoming, state, *args, **kwargs):
        """Gated recurrent unit (GRU) with nunits cells."""
        with get_variable_scope('Gates'):  # Reset gate and update gate.
            weights_init = getters.get_initializer(self.weights_init)
            # We start with bias of 1.0 to not reset and not update.
            r, u = array_ops.split(
                axis=1, num_or_size_splits=2,
                value=_linear([incoming, state], 2 * self._num_units, True, 1.0,
                              weights_init, self.trainable, self.restore))
            inner_activation = getters.get_activation(self.inner_activation)
            r, u = inner_activation(r), inner_activation(u)
        with get_variable_scope('Candidate'):
            activation = getters.get_activation(self.activation)
            c = activation(
                _linear([incoming, r * state], self._num_units, True, 0.,
                        weights_init, self.trainable, self.restore))
        new_h = u * state + (1 - u) * c

        self._w, self._b = list(), list()
        # Retrieve RNN Variables
        with get_variable_scope(scope='Gates/Linear', reuse=True):
            self._w.append(x=tf.get_variable('w'))
            self._b.append(x=tf.get_variable('b'))
        with get_variable_scope(scope='Candidate/Linear', reuse=True):
            self._w.append(x=tf.get_variable('w'))
            self._b.append(x=tf.get_variable('b'))

        return new_h, new_h
Ejemplo n.º 2
0
    def _build(self, incoming, state, *args, **kwargs):
        """Gated recurrent unit (GRU) with nunits cells."""
        with get_variable_scope('Gates'):  # Reset gate and update gate.
            weights_init = getters.get_initializer(self.weights_init)
            # We start with bias of 1.0 to not reset and not update.
            r, u = array_ops.split(axis=1,
                                   num_or_size_splits=2,
                                   value=_linear([incoming, state],
                                                 2 * self._num_units, True,
                                                 1.0, weights_init,
                                                 self.trainable, self.restore))
            inner_activation = getters.get_activation(self.inner_activation)
            r, u = inner_activation(r), inner_activation(u)
        with get_variable_scope('Candidate'):
            activation = getters.get_activation(self.activation)
            c = activation(
                _linear([incoming, r * state], self._num_units, True, 0.,
                        weights_init, self.trainable, self.restore))
        new_h = u * state + (1 - u) * c

        self._w, self._b = list(), list()
        # Retrieve RNN Variables
        with get_variable_scope(scope='Gates/Linear', reuse=True):
            self._w.append(x=tf.get_variable('w'))
            self._b.append(x=tf.get_variable('b'))
        with get_variable_scope(scope='Candidate/Linear', reuse=True):
            self._w.append(x=tf.get_variable('w'))
            self._b.append(x=tf.get_variable('b'))

        return new_h, new_h
Ejemplo n.º 3
0
    def _build(self, inputs, state, *args, **kwargs):
        """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
        weights_init = getters.get_initializer(self.weights_init)
        output = getters.get_activation(self.activation)(
            _linear([inputs, state], self.num_units, True, 0., weights_init,
                    self.trainable, self.restore))
        # Retrieve RNN Variables
        with get_variable_scope(name='Linear', reuse=True):
            self._w = tf.get_variable(name='w')
            self._b = tf.get_variable(name='b')

        return output, output
Ejemplo n.º 4
0
    def _build(self, inputs, state, *args, **kwargs):
        """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
        weights_init = getters.get_initializer(self.weights_init)
        output = getters.get_activation(self.activation)(
            _linear([inputs, state], self.num_units, True, 0.,
                    weights_init, self.trainable, self.restore))
        # Retrieve RNN Variables
        with get_variable_scope(name='Linear', reuse=True):
            self._w = tf.get_variable(name='w')
            self._b = tf.get_variable(name='b')

        return output, output
Ejemplo n.º 5
0
def _linear(args, output_size, bias, bias_start=0.0, weights_init=None,
            trainable=True, restore=True, scope=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Args:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_start: starting value to initialize the bias; 0 by default.
        scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError('`args` must be specified')
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes))
        if not shape[1]:
            raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes))
        else:
            total_arg_size += shape[1]

    # Now the computation.
    with get_variable_scope(scope or 'Linear'):
        _w = variable(name='w', shape=[total_arg_size, output_size], initializer=weights_init,
                      trainable=trainable, restore=restore)
        if len(args) == 1:
            res = tf.matmul(a=args[0], b=_w)
        else:
            res = tf.matmul(a=array_ops.concat(values=args, axis=1), b=_w)
        if not bias:
            return res
        _b = variable(name='b', shape=[output_size],
                      initializer=tf.constant_initializer(bias_start),
                      trainable=trainable, restore=restore)
    return res + _b
Ejemplo n.º 6
0
def _linear(args, output_size, bias, bias_start=0.0, weights_init=None,
            trainable=True, restore=True, scope=None):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Args:
        args: a 2D Tensor or a list of 2D, batch x n, Tensors.
        output_size: int, second dimension of W[i].
        bias: boolean, whether to add a bias term or not.
        bias_start: starting value to initialize the bias; 0 by default.
        scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
        A 2D Tensor with shape [batch x output_size] equal to
        sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
        ValueError: if some of the arguments has unspecified or wrong shape.
    """
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError('`args` must be specified')
    if not nest.is_sequence(args):
        args = [args]

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes))
        if not shape[1]:
            raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes))
        else:
            total_arg_size += shape[1]

    # Now the computation.
    with get_variable_scope(scope or 'Linear'):
        _w = variable(name='w', shape=[total_arg_size, output_size], initializer=weights_init,
                      trainable=trainable, restore=restore)
        if len(args) == 1:
            res = tf.matmul(a=args[0], b=_w)
        else:
            res = tf.matmul(a=array_ops.concat(values=args, axis=1), b=_w)
        if not bias:
            return res
        _b = variable(name='b', shape=[output_size],
                      initializer=tf.constant_initializer(bias_start),
                      trainable=trainable, restore=restore)
    return res + _b
Ejemplo n.º 7
0
    def _build(self, incoming, state, *args, **kwargs):
        """Long short-term memory cell (LSTM)."""
        self._declare_dependencies()
        activation = getters.get_activation(self.activation)
        inner_activation = getters.get_activation(self.inner_activation)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(axis=1, num_or_size_splits=2, value=state)
        concat = _linear([incoming, h], 4 * self._num_units, True, 0.,
                         self.weights_init, self.trainable, self.restore)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(axis=1,
                                     num_or_size_splits=4,
                                     value=concat)

        # apply batch normalization to inner state and gates
        if self.batch_norm:
            i = self._batch_norm_i(i)
            j = self._batch_norm_j(j)
            f = self._batch_norm_f(f)
            o = self._batch_norm_o(o)

        new_c = (c * inner_activation(f + self._forget_bias) +
                 inner_activation(i) * activation(j))

        # hidden-to-hidden batch normalizaiton
        if self.batch_norm:
            batch_norm_new_c = self._batch_norm_c(new_c)
            new_h = activation(batch_norm_new_c) * inner_activation(o)
        else:
            new_h = activation(new_c) * inner_activation(o)

        if self._state_is_tuple:
            new_state = rnn.LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat(values=[new_c, new_h], axis=1)

        # Retrieve RNN Variables
        with get_variable_scope(scope='Linear', reuse=True):
            self._w = tf.get_variable('w')
            self._b = tf.get_variable('b')

        return new_h, new_state
Ejemplo n.º 8
0
    def _build(self, incoming, state, *args, **kwargs):
        """Long short-term memory cell (LSTM)."""
        self._declare_dependencies()
        activation = getters.get_activation(self.activation)
        inner_activation = getters.get_activation(self.inner_activation)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(axis=1, num_or_size_splits=2, value=state)
        concat = _linear(
            [incoming, h], 4 * self._num_units, True, 0., self.weights_init,
            self.trainable, self.restore)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(axis=1, num_or_size_splits=4, value=concat)

        # apply batch normalization to inner state and gates
        if self.batch_norm:
            i = self._batch_norm_i(i)
            j = self._batch_norm_j(j)
            f = self._batch_norm_f(f)
            o = self._batch_norm_o(o)

        new_c = (c * inner_activation(f + self._forget_bias) + inner_activation(i) * activation(j))

        # hidden-to-hidden batch normalizaiton
        if self.batch_norm:
            batch_norm_new_c = self._batch_norm_c(new_c)
            new_h = activation(batch_norm_new_c) * inner_activation(o)
        else:
            new_h = activation(new_c) * inner_activation(o)

        if self._state_is_tuple:
            new_state = rnn_cell.LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat(values=[new_c, new_h], axis=1)

        # Retrieve RNN Variables
        with get_variable_scope(scope='Linear', reuse=True):
            self._w = tf.get_variable('w')
            self._b = tf.get_variable('b')

        return new_h, new_state
Ejemplo n.º 9
0
 def _build(self, incoming, state, *args, **kwargs):
     """Run this multi-layer cell on inputs, starting from state."""
     cur_state_pos = 0
     cur_inp = incoming
     new_states = []
     for i, cell in enumerate(self._cells):
         with get_variable_scope("cell_{}".format(i)):
             if self._state_is_tuple:
                 if not nest.is_sequence(state):
                     raise ValueError(
                         "Expected state to be a tuple of length %d, but received: {}".format(
                             len(self.state_size), state))
                 cur_state = state[i]
             else:
                 cur_state = array_ops.slice(state, [0, cur_state_pos], [-1, cell.state_size])
                 cur_state_pos += cell.state_size
             cur_inp, new_state = cell(cur_inp, cur_state)
             new_states.append(new_state)
     new_states = (tuple(new_states) if self._state_is_tuple else
                   array_ops.concat(values=new_states, axis=1))
     return cur_inp, new_states
Ejemplo n.º 10
0
 def _build(self, incoming, state, *args, **kwargs):
     """Run this multi-layer cell on inputs, starting from state."""
     cur_state_pos = 0
     cur_inp = incoming
     new_states = []
     for i, cell in enumerate(self._cells):
         with get_variable_scope("cell_{}".format(i)):
             if self._state_is_tuple:
                 if not nest.is_sequence(state):
                     raise ValueError(
                         "Expected state to be a tuple of length %d, but received: {}".format(
                             len(self.state_size), state))
                 cur_state = state[i]
             else:
                 cur_state = array_ops.slice(state, [0, cur_state_pos], [-1, cell.state_size])
                 cur_state_pos += cell.state_size
             cur_inp, new_state = cell(cur_inp, cur_state)
             new_states.append(new_state)
     new_states = (tuple(new_states) if self._state_is_tuple else
                   array_ops.concat(values=new_states, axis=1))
     return cur_inp, new_states