def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """Long short-term memory cell (LSTM)."""
            with tf.variable_scope(scope
                                   or type(self).__name__):  # "BasicLSTMCell"
                # Parameters of gates are concatenated into one multiply for efficiency.
                h, c = tf.split(1, 2, state)

                concat = linear([inputs, h], self._num_units * 4, False, 0.0)

                concat = layer_norm(concat, num_variables_in_tensor=4)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                i, j, f, o = tf.split(1, 4, concat)

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(
                    i) * input_contribution
                with tf.variable_scope('new_h_output'):
                    new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

            return new_h, tf.concat(1, [new_h, new_c])  #purposely reversed
    def __call__(self, inputs, state, timestep=0, scope=None):
        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        layer_norm(
                            linear([inputs, current_state], self._num_units,
                                   False)))
                else:
                    highway_factor = tf.tanh(
                        layer_norm(
                            linear([current_state], self._num_units, False)))
            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([inputs, current_state], self._num_units, True,
                               -3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1.0 - gate_for_highway_factor

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
Example #3
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                concated_r_u = layer_norm(linear([inputs, state],
                                                 2 * self._num_units, False,
                                                 1.0),
                                          num_variables_in_tensor=2,
                                          initial_bias_value=1.0)

                r, u = tf.split(axis=1,
                                num_or_size_splits=2,
                                value=tf.sigmoid(concated_r_u))

            with tf.variable_scope("Candidate"):
                with tf.variable_scope("reset_portion"):
                    reset_portion = r * layer_norm(
                        linear([state], self._num_units, False))
                with tf.variable_scope("inputs_portion"):
                    inputs_portion = layer_norm(
                        linear([inputs], self._num_units, False))
                c = tf.tanh(reset_portion + inputs_portion)

            new_h = u * state + (1 - u) * c
        return new_h, new_h
  def __call__(self, inputs, state, timestep = 0, scope=None):          
      """Normal Gated recurrent unit (GRU) with nunits cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"

        with tf.variable_scope("Inputs"):
          inputs_concat = linear([inputs], self._num_units*2, False, 1.0)

          inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor = 2, initial_bias_value = 1.0)

        with tf.variable_scope("Hidden_State"):
          hidden_state_concat = linear([state], self._num_units*2, False)

          hidden_state_concat = layer_norm(hidden_state_concat, num_variables_in_tensor = 2)

          r, u = tf.split(1, 2, tf.sigmoid(
              multiplicative_integration([inputs_concat,hidden_state_concat], 2*self._num_units, 1.0, weights_already_calculated = True)))

        with tf.variable_scope("Candidate"): 

          with tf.variable_scope('input_portion'):
            input_portion = layer_norm(linear([inputs], self._num_units, False))
          with tf.variable_scope('reset_portion'):
            reset_portion = r * layer_norm(linear([state], self._num_units, False))

          c = tf.tanh(multiplicative_integration([input_portion, reset_portion], self._num_units, 0.0, weights_already_calculated = True))

        new_h = u * state + (1 - u) * c 

      return new_h, new_h
  def __call__(self, inputs, state, timestep = 0, scope=None):

      """Long short-term memory cell (LSTM).
      The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10
      """
      with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
        # Parameters of gates are concatenated into one multiply for efficiency.
        h, c = tf.split(1, 2, state)

        '''note that bias is set to 0 because batch norm bias is added later'''
        with tf.variable_scope('inputs_weight_matrix'):
          inputs_concat = linear([inputs], 4 * self._num_units, False)

          inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor = 4,  scope = "inputs_concat_layer_norm")

        with tf.variable_scope('state_weight_matrix'):
          h_concat = linear([h], 4 * self._num_units, False)
          h_concat = layer_norm(h_concat,num_variables_in_tensor = 4, scope = "h_concat_layer_norm")

        i, j, f, o = tf.split(1, 4, 
          multiplicative_integration([inputs_concat,h_concat], 4*self._num_units, 0.0, weights_already_calculated = True))

        new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j)
        
        '''apply layer norm to the hidden state transition'''
        with tf.variable_scope('layer_norm_hidden_state'):
          new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

      return new_h, tf.concat(1, [new_h, new_c]) #reversed this
  def __call__(self, inputs, state, timestep = 0, scope=None):          
      """Normal Gated recurrent unit (GRU) with nunits cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"

        with tf.variable_scope("Inputs"):
          inputs_concat = linear([inputs], self._num_units*2, False, 1.0)

          inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor = 2, initial_bias_value = 1.0)

        with tf.variable_scope("Hidden_State"):
          hidden_state_concat = linear([state], self._num_units*2, False)

          hidden_state_concat = layer_norm(hidden_state_concat, num_variables_in_tensor = 2)

          r, u = tf.split(1, 2, tf.sigmoid(
              multiplicative_integration([inputs_concat,hidden_state_concat], 2*self._num_units, 1.0, weights_already_calculated = True)))

        with tf.variable_scope("Candidate"): 

          with tf.variable_scope('input_portion'):
            input_portion = layer_norm(linear([inputs], self._num_units, False))
          with tf.variable_scope('reset_portion'):
            reset_portion = r * layer_norm(linear([state], self._num_units, False))

          c = tf.tanh(multiplicative_integration([input_portion, reset_portion], self._num_units, 0.0, weights_already_calculated = True))

        new_h = u * state + (1 - u) * c 

      return new_h, new_h
  def __call__(self, inputs, state, timestep = 0, scope=None):

      """Long short-term memory cell (LSTM).
      The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10
      """
      with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
        # Parameters of gates are concatenated into one multiply for efficiency.
        h, c = tf.split(1, 2, state)

        '''note that bias is set to 0 because batch norm bias is added later'''
        with tf.variable_scope('inputs_weight_matrix'):
          inputs_concat = linear([inputs], 4 * self._num_units, False)

          inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor = 4,  scope = "inputs_concat_layer_norm")

        with tf.variable_scope('state_weight_matrix'):
          h_concat = linear([h], 4 * self._num_units, False)
          h_concat = layer_norm(h_concat,num_variables_in_tensor = 4, scope = "h_concat_layer_norm")

        i, j, f, o = tf.split(1, 4, 
          multiplicative_integration([inputs_concat,h_concat], 4*self._num_units, 0.0, weights_already_calculated = True))

        new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j)
        
        '''apply layer norm to the hidden state transition'''
        with tf.variable_scope('layer_norm_hidden_state'):
          new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

      return new_h, tf.concat([new_h, new_c], 1) #reversed this
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            hidden_state_plus_c_list = tf.split(1, self.num_memory_arrays + 1,
                                                state)

            h = hidden_state_plus_c_list[0]
            c_list = hidden_state_plus_c_list[1:]
            '''very large matrix multiplication to speed up procedure -- will split variables out later'''

            if self.use_multiplicative_integration:
                concat = multiplicative_integration(
                    [inputs, h], self._num_units * 4 * self.num_memory_arrays,
                    0.0)
            else:
                concat = linear([inputs, h],
                                self._num_units * 4 * self.num_memory_arrays,
                                True)

            if self.use_layer_normalization:
                concat = layer_norm(concat,
                                    num_variables_in_tensor=4 *
                                    self.num_memory_arrays)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate -- comes in sets of fours
            all_vars_list = tf.split(1, 4 * self.num_memory_arrays, concat)

            new_c_list, new_h_list = [], []
            for array_counter in xrange(self.num_memory_arrays):

                i = all_vars_list[0 + array_counter * 4]
                j = all_vars_list[1 + array_counter * 4]
                f = all_vars_list[2 + array_counter * 4]
                o = all_vars_list[3 + array_counter * 4]

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c_list.append(c_list[array_counter] *
                                  tf.sigmoid(f + self._forget_bias) +
                                  tf.sigmoid(i) * input_contribution)

                if self.use_layer_normalization:
                    new_c = layer_norm(new_c_list[-1])
                else:
                    new_c = new_c_list[-1]

                new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o))
            '''sum all new_h components -- I'm surprised that there is no division by num_memory_arrays'''
            new_h = tf.add_n(new_h_list)

        return new_h, tf.concat(1, [new_h] + new_c_list)  #purposely reversed
    def __call__(self, inputs, state, timestep=0, scope=None):

        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units))
                else:
                    highway_factor = tf.tanh(
                        layer_norm(
                            linear([current_state], self._num_units, True)))

            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units,
                                                   initial_bias_value=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1 - gate_for_highway_factor

                if self.use_recurrent_dropout and self.is_training:
                    highway_factor = tf.nn.dropout(
                        highway_factor, self.recurrent_dropout_factor)

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
  def __call__(self, inputs, state, timestep = 0, scope=None):
    with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      hidden_state_plus_c_list = tf.split(1, self.num_memory_arrays + 1, state)

      h = hidden_state_plus_c_list[0]
      c_list = hidden_state_plus_c_list[1:]

      '''very large matrix multiplication to speed up procedure -- will split variables out later'''
      
      if self.use_multiplicative_integration:
        concat = multiplicative_integration([inputs, h], self._num_units * 4 * self.num_memory_arrays, 0.0)
      else:
        concat = linear([inputs, h], self._num_units * 4 * self.num_memory_arrays, True)

      if self.use_layer_normalization: concat = layer_norm(concat, num_variables_in_tensor = 4 * self.num_memory_arrays)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate -- comes in sets of fours
      all_vars_list = tf.split(1, 4 * self.num_memory_arrays, concat)

      '''memory array loop'''
      new_c_list, new_h_list = [], []
      for array_counter in xrange(self.num_memory_arrays):

        i = all_vars_list[0 + array_counter * 4]
        j = all_vars_list[1 + array_counter * 4]
        f = all_vars_list[2 + array_counter * 4]
        o = all_vars_list[3 + array_counter * 4]

        if self.use_recurrent_dropout and self.is_training:
          input_contribution = tf.nn.dropout(tf.tanh(j), self.recurrent_dropout_factor)
        else:
          input_contribution = tf.tanh(j) 

        new_c_list.append(c_list[array_counter] * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution)

        if self.use_layer_normalization: 
          new_c = layer_norm(new_c_list[-1])
        else:
          new_c = new_c_list[-1]
          
        new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o))

      '''sum all new_h components -- could instead do a mean -- but investigate that later'''
      new_h = tf.add_n(new_h_list)
  
    return new_h, tf.concat(1, [new_h] + new_c_list) #purposely reversed
  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
      with tf.variable_scope("Gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        concated_r_u = layer_norm(linear([inputs, state], 2 * self._num_units, False, 1.0), num_variables_in_tensor = 2, initial_bias_value = 1.0)

        r, u = tf.split(1, 2, tf.sigmoid(concated_r_u))

      with tf.variable_scope("Candidate"):
        with tf.variable_scope("reset_portion"):
          reset_portion = r*layer_norm(linear([state], self._num_units, False))
        with tf.variable_scope("inputs_portion"):
          inputs_portion = layer_norm(linear([inputs], self._num_units, False))
        c = tf.tanh(reset_portion + inputs_portion)

      new_h = u * state + (1 - u) * c
    return new_h, new_h
  def __call__(self, inputs, state, timestep = 0, scope=None):
    current_state = state
    for highway_layer in xrange(self.num_highway_layers):
      with tf.variable_scope('highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          highway_factor = tf.tanh(layer_norm(linear([inputs, current_state], self._num_units, False)))
        else:
          highway_factor = tf.tanh(layer_norm(linear([current_state], self._num_units, False)))
      with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          gate_for_highway_factor = tf.sigmoid(linear([inputs, current_state], self._num_units, True, -3.0))
        else:
          gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0))

        gate_for_hidden_factor = 1.0 - gate_for_highway_factor

      current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

    return current_state, current_state
  def __call__(self, inputs, state, timestep = 0, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """Long short-term memory cell (LSTM)."""
      with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
        # Parameters of gates are concatenated into one multiply for efficiency.
        h, c = tf.split(1, 2, state)

        concat = linear([inputs, h], self._num_units * 4, False, 0.0)

        concat = layer_norm(concat, num_variables_in_tensor = 4)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = tf.split(1, 4, concat)

        if self.use_recurrent_dropout and self.is_training:
          input_contribution = tf.nn.dropout(tf.tanh(j), self.recurrent_dropout_factor)
        else:
          input_contribution = tf.tanh(j) 

        new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution
        with tf.variable_scope('new_h_output'): 
          new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)
    
      return new_h, tf.concat(1, [new_h, new_c]) #purposely reversed
  def __call__(self, inputs, state, timestep = 0, scope=None):

    current_state = state
    for highway_layer in xrange(self.num_highway_layers):
      with tf.variable_scope('highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          highway_factor = tf.tanh(multiplicative_integration([inputs, current_state], self._num_units))
        else:
          highway_factor = tf.tanh(layer_norm(linear([current_state], self._num_units, True)))

      with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          gate_for_highway_factor = tf.sigmoid(multiplicative_integration([inputs, current_state], self._num_units, initial_bias_value = -3.0))
        else:
          gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0))

        gate_for_hidden_factor = 1 - gate_for_highway_factor

        if self.use_recurrent_dropout and self.is_training:
          highway_factor = tf.nn.dropout(highway_factor, self.recurrent_dropout_factor)

      current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

    return current_state, current_state