def _build(self, incoming, state, *args, **kwargs): """Gated recurrent unit (GRU) with nunits cells.""" with get_variable_scope('Gates'): # Reset gate and update gate. weights_init = getters.get_initializer(self.weights_init) # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split( axis=1, num_or_size_splits=2, value=_linear([incoming, state], 2 * self._num_units, True, 1.0, weights_init, self.trainable, self.restore)) inner_activation = getters.get_activation(self.inner_activation) r, u = inner_activation(r), inner_activation(u) with get_variable_scope('Candidate'): activation = getters.get_activation(self.activation) c = activation( _linear([incoming, r * state], self._num_units, True, 0., weights_init, self.trainable, self.restore)) new_h = u * state + (1 - u) * c self._w, self._b = list(), list() # Retrieve RNN Variables with get_variable_scope(scope='Gates/Linear', reuse=True): self._w.append(x=tf.get_variable('w')) self._b.append(x=tf.get_variable('b')) with get_variable_scope(scope='Candidate/Linear', reuse=True): self._w.append(x=tf.get_variable('w')) self._b.append(x=tf.get_variable('b')) return new_h, new_h
def _build(self, incoming, state, *args, **kwargs): """Gated recurrent unit (GRU) with nunits cells.""" with get_variable_scope('Gates'): # Reset gate and update gate. weights_init = getters.get_initializer(self.weights_init) # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(axis=1, num_or_size_splits=2, value=_linear([incoming, state], 2 * self._num_units, True, 1.0, weights_init, self.trainable, self.restore)) inner_activation = getters.get_activation(self.inner_activation) r, u = inner_activation(r), inner_activation(u) with get_variable_scope('Candidate'): activation = getters.get_activation(self.activation) c = activation( _linear([incoming, r * state], self._num_units, True, 0., weights_init, self.trainable, self.restore)) new_h = u * state + (1 - u) * c self._w, self._b = list(), list() # Retrieve RNN Variables with get_variable_scope(scope='Gates/Linear', reuse=True): self._w.append(x=tf.get_variable('w')) self._b.append(x=tf.get_variable('b')) with get_variable_scope(scope='Candidate/Linear', reuse=True): self._w.append(x=tf.get_variable('w')) self._b.append(x=tf.get_variable('b')) return new_h, new_h
def _build(self, inputs, state, *args, **kwargs): """Most basic RNN: output = new_state = activation(W * input + U * state + B).""" weights_init = getters.get_initializer(self.weights_init) output = getters.get_activation(self.activation)( _linear([inputs, state], self.num_units, True, 0., weights_init, self.trainable, self.restore)) # Retrieve RNN Variables with get_variable_scope(name='Linear', reuse=True): self._w = tf.get_variable(name='w') self._b = tf.get_variable(name='b') return output, output
def _build(self, inputs, state, *args, **kwargs): """Most basic RNN: output = new_state = activation(W * input + U * state + B).""" weights_init = getters.get_initializer(self.weights_init) output = getters.get_activation(self.activation)( _linear([inputs, state], self.num_units, True, 0., weights_init, self.trainable, self.restore)) # Retrieve RNN Variables with get_variable_scope(name='Linear', reuse=True): self._w = tf.get_variable(name='w') self._b = tf.get_variable(name='b') return output, output
def _linear(args, output_size, bias, bias_start=0.0, weights_init=None, trainable=True, restore=True, scope=None): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (nest.is_sequence(args) and not args): raise ValueError('`args` must be specified') if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] for shape in shapes: if len(shape) != 2: raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes)) if not shape[1]: raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes)) else: total_arg_size += shape[1] # Now the computation. with get_variable_scope(scope or 'Linear'): _w = variable(name='w', shape=[total_arg_size, output_size], initializer=weights_init, trainable=trainable, restore=restore) if len(args) == 1: res = tf.matmul(a=args[0], b=_w) else: res = tf.matmul(a=array_ops.concat(values=args, axis=1), b=_w) if not bias: return res _b = variable(name='b', shape=[output_size], initializer=tf.constant_initializer(bias_start), trainable=trainable, restore=restore) return res + _b
def _linear(args, output_size, bias, bias_start=0.0, weights_init=None, trainable=True, restore=True, scope=None): """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable. Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. bias: boolean, whether to add a bias term or not. bias_start: starting value to initialize the bias; 0 by default. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ if args is None or (nest.is_sequence(args) and not args): raise ValueError('`args` must be specified') if not nest.is_sequence(args): args = [args] # Calculate the total size of arguments on dimension 1. total_arg_size = 0 shapes = [a.get_shape().as_list() for a in args] for shape in shapes: if len(shape) != 2: raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes)) if not shape[1]: raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes)) else: total_arg_size += shape[1] # Now the computation. with get_variable_scope(scope or 'Linear'): _w = variable(name='w', shape=[total_arg_size, output_size], initializer=weights_init, trainable=trainable, restore=restore) if len(args) == 1: res = tf.matmul(a=args[0], b=_w) else: res = tf.matmul(a=array_ops.concat(values=args, axis=1), b=_w) if not bias: return res _b = variable(name='b', shape=[output_size], initializer=tf.constant_initializer(bias_start), trainable=trainable, restore=restore) return res + _b
def _build(self, incoming, state, *args, **kwargs): """Long short-term memory cell (LSTM).""" self._declare_dependencies() activation = getters.get_activation(self.activation) inner_activation = getters.get_activation(self.inner_activation) # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(axis=1, num_or_size_splits=2, value=state) concat = _linear([incoming, h], 4 * self._num_units, True, 0., self.weights_init, self.trainable, self.restore) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(axis=1, num_or_size_splits=4, value=concat) # apply batch normalization to inner state and gates if self.batch_norm: i = self._batch_norm_i(i) j = self._batch_norm_j(j) f = self._batch_norm_f(f) o = self._batch_norm_o(o) new_c = (c * inner_activation(f + self._forget_bias) + inner_activation(i) * activation(j)) # hidden-to-hidden batch normalizaiton if self.batch_norm: batch_norm_new_c = self._batch_norm_c(new_c) new_h = activation(batch_norm_new_c) * inner_activation(o) else: new_h = activation(new_c) * inner_activation(o) if self._state_is_tuple: new_state = rnn.LSTMStateTuple(new_c, new_h) else: new_state = tf.concat(values=[new_c, new_h], axis=1) # Retrieve RNN Variables with get_variable_scope(scope='Linear', reuse=True): self._w = tf.get_variable('w') self._b = tf.get_variable('b') return new_h, new_state
def _build(self, incoming, state, *args, **kwargs): """Long short-term memory cell (LSTM).""" self._declare_dependencies() activation = getters.get_activation(self.activation) inner_activation = getters.get_activation(self.inner_activation) # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(axis=1, num_or_size_splits=2, value=state) concat = _linear( [incoming, h], 4 * self._num_units, True, 0., self.weights_init, self.trainable, self.restore) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(axis=1, num_or_size_splits=4, value=concat) # apply batch normalization to inner state and gates if self.batch_norm: i = self._batch_norm_i(i) j = self._batch_norm_j(j) f = self._batch_norm_f(f) o = self._batch_norm_o(o) new_c = (c * inner_activation(f + self._forget_bias) + inner_activation(i) * activation(j)) # hidden-to-hidden batch normalizaiton if self.batch_norm: batch_norm_new_c = self._batch_norm_c(new_c) new_h = activation(batch_norm_new_c) * inner_activation(o) else: new_h = activation(new_c) * inner_activation(o) if self._state_is_tuple: new_state = rnn_cell.LSTMStateTuple(new_c, new_h) else: new_state = tf.concat(values=[new_c, new_h], axis=1) # Retrieve RNN Variables with get_variable_scope(scope='Linear', reuse=True): self._w = tf.get_variable('w') self._b = tf.get_variable('b') return new_h, new_state
def _build(self, incoming, state, *args, **kwargs): """Run this multi-layer cell on inputs, starting from state.""" cur_state_pos = 0 cur_inp = incoming new_states = [] for i, cell in enumerate(self._cells): with get_variable_scope("cell_{}".format(i)): if self._state_is_tuple: if not nest.is_sequence(state): raise ValueError( "Expected state to be a tuple of length %d, but received: {}".format( len(self.state_size), state)) cur_state = state[i] else: cur_state = array_ops.slice(state, [0, cur_state_pos], [-1, cell.state_size]) cur_state_pos += cell.state_size cur_inp, new_state = cell(cur_inp, cur_state) new_states.append(new_state) new_states = (tuple(new_states) if self._state_is_tuple else array_ops.concat(values=new_states, axis=1)) return cur_inp, new_states
def _build(self, incoming, state, *args, **kwargs): """Run this multi-layer cell on inputs, starting from state.""" cur_state_pos = 0 cur_inp = incoming new_states = [] for i, cell in enumerate(self._cells): with get_variable_scope("cell_{}".format(i)): if self._state_is_tuple: if not nest.is_sequence(state): raise ValueError( "Expected state to be a tuple of length %d, but received: {}".format( len(self.state_size), state)) cur_state = state[i] else: cur_state = array_ops.slice(state, [0, cur_state_pos], [-1, cell.state_size]) cur_state_pos += cell.state_size cur_inp, new_state = cell(cur_inp, cur_state) new_states.append(new_state) new_states = (tuple(new_states) if self._state_is_tuple else array_ops.concat(values=new_states, axis=1)) return cur_inp, new_states