def __call__(self, inputs, state, timestep=0, scope=None):
        '''Most basic RNN: output = new_state = tanh(W * input + U * state + B).'''

        current_state = state
        for highway_layer in range(self.num_highway_layers):
            with tf.variable_scope('highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        multiplicative_integration(
                            [inputs, current_state], output_size=self._num_units))
                else:
                    highway_factor = tf.tanh(
                        linear([current_state], output_size=self._num_units, bias=True))

            with tf.variable_scope('gate_for_highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(multiplicative_integration(
                        [inputs, current_state], self._num_units, initial_bias_value=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], output_size=self._num_units,
                               bias=True, bias_start=-3.0))

                gate_for_hidden_factor = 1 - gate_for_highway_factor

                if self.use_recurrent_dropout and self.is_training:
                    highway_factor = tf.nn.dropout(
                        highway_factor, keep_prob=self.recurrent_dropout_factor)

            current_state = (highway_factor * gate_for_highway_factor +
                             current_state * gate_for_hidden_factor)

        return current_state, current_state
Exemple #2
0
    def __call__(self, inputs, state, timestep=0, scope=None):

        current_state = state
        for highway_layer in range(self.num_highway_layers):
            with tf.variable_scope('highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(multiplicative_integration(
                        [inputs, current_state], self._num_units))
                else:
                    highway_factor = tf.tanh(layer_norm(
                        linear([current_state], self._num_units, True)))

            with tf.variable_scope('gate_for_highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(multiplicative_integration(
                        [inputs, current_state], self._num_units, initial_bias_value=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1 - gate_for_highway_factor

                if self.use_recurrent_dropout and self.is_training:
                    highway_factor = tf.nn.dropout(
                        highway_factor, self.recurrent_dropout_factor)

            current_state = highway_factor * gate_for_highway_factor + \
                current_state * gate_for_hidden_factor

        return current_state, current_state
def multiplicative_integration(list_of_inputs,
                               output_size,
                               initial_bias_value=0.0,
                               weights_already_calculated=False,
                               use_highway_gate=False,
                               use_l2_loss=False,
                               scope=None,
                               timestep=0):
    '''expects len(2) for list of inputs and will perform integrative multiplication
    weights_already_calculated will treat the list of inputs as Wx and Uz
    and is useful for batch normed inputs.
    '''
    with tf.variable_scope(scope or 'double_inputs_multiple_integration'):
        if len(list_of_inputs) != 2:
            raise ValueError("list of inputs must be 2, you have: ",
                             len(list_of_inputs))

        if weights_already_calculated:  # if already have weights to insert from batch norm
            Wx = list_of_inputs[0]
            Uz = list_of_inputs[1]

        else:
            with tf.variable_scope('Calculate_Wx_mulint'):
                Wx = linear.linear(list_of_inputs[0],
                                   output_size,
                                   bias=False,
                                   use_l2_loss=use_l2_loss,
                                   timestep=timestep)
            with tf.variable_scope('Calculate_Uz_mulint'):
                Uz = linear.linear(list_of_inputs[1],
                                   output_size,
                                   bias=False,
                                   use_l2_loss=use_l2_loss,
                                   timestep=timestep)

        with tf.variable_scope('multiplicative_integration'):
            alpha = tf.get_variable(
                'mulint_alpha', [output_size],
                initializer=tf.truncated_normal_initializer(mean=1.0,
                                                            stddev=0.1))

            beta1, beta2 = tf.split(
                axis=0,
                num_or_size_splits=2,
                value=tf.get_variable(
                    'mulint_params_betas', [output_size * 2],
                    initializer=tf.truncated_normal_initializer(mean=0.5,
                                                                stddev=0.1)))

            original_bias = tf.get_variable(
                'mulint_original_bias', [output_size],
                initializer=tf.truncated_normal_initializer(
                    mean=initial_bias_value, stddev=0.1))

        final_output = alpha * Wx * Uz + beta1 * Uz + beta2 * Wx + original_bias

        if use_highway_gate:
            final_output = highway_network.apply_highway_gate(
                final_output, list_of_inputs[0])
    return final_output
    def _inner_function(self,
                        inputs,
                        past_hidden_state,
                        activation=tf.nn.tanh):
        '''second order function as described equation 11 in delta rnn paper
        The main goal is to produce z_t of this function
        '''
        V_x_d = linear(past_hidden_state, self._num_units, True)

        # We make this a private variable to be reused in the _outer_function
        self._W_x_inputs = linear(inputs, self._num_units, True)

        alpha = tf.get_variable('alpha', [self._num_units],
                                dtype=tf.float32,
                                initializer=tf.ones_initializer())

        beta_one = tf.get_variable('beta_one', [self._num_units],
                                   dtype=tf.float32,
                                   initializer=tf.ones_initializer())

        beta_two = tf.get_variable('beta_two', [self._num_units],
                                   dtype=tf.float32,
                                   initializer=tf.ones_initializer())

        z_t_bias = tf.get_variable('z_t_bias', [self._num_units],
                                   dtype=tf.float32,
                                   initializer=tf.zeros_initializer())

        # Second Order Cell Calculations
        d_1_t = alpha * V_x_d * self._W_x_inputs
        d_2_t = beta_one * V_x_d + beta_two * self._W_x_inputs

        z_t = activation(d_1_t + d_2_t + z_t_bias)

        return z_t
    def __call__(self, inputs, state, timestep=0, scope=None):
        current_state = state
        for highway_layer in range(self.num_highway_layers):
            with tf.variable_scope('highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        linear([inputs, current_state],
                               output_size=self._num_units,
                               bias=True))
                else:
                    highway_factor = tf.tanh(
                        linear([current_state],
                               output_size=self._num_units,
                               bias=True))
            with tf.variable_scope(
                    'gate_for_highway_factor_{}'.format(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([inputs, current_state],
                               output_size=self._num_units,
                               bias=True,
                               bias_start=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state],
                               output_size=self._num_units,
                               bias=True,
                               bias_start=-3.0))

                gate_for_hidden_factor = 1.0 - gate_for_highway_factor

            current_state = (highway_factor * gate_for_highway_factor +
                             current_state * gate_for_hidden_factor)

        return current_state, current_state
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            # Forget Gate bias starts as 1.0 -- TODO: double check if this is correct
            with tf.variable_scope('Gates'):
                if self.use_multiplicative_integration:
                    gated_factor = multiplicative_integration(
                        [inputs, state], self._num_units,
                        self.forget_bias_initialization)
                else:
                    gated_factor = linear([inputs, state], self._num_units,
                                          True,
                                          self.forget_bias_initialization)

                gated_factor = tf.sigmoid(gated_factor)

            with tf.variable_scope('Candidate'):
                c = tf.tanh(linear([inputs], self._num_units, True, 0.0))

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        c, self.recurrent_dropout_factor)
                else:
                    input_contribution = c

            new_h = (1 -
                     gated_factor) * state + gated_factor * input_contribution

        return new_h, new_h
Exemple #7
0
  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
      with tf.variable_scope("Gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        concated_r_u = layer_norm(linear([inputs, state], 2 * self._num_units, False, 1.0), num_variables_in_tensor = 2, initial_bias_value = 1.0)

        r, u = tf.split(axis=1, num_or_size_splits=2, value=tf.sigmoid(concated_r_u))

      with tf.variable_scope("Candidate"):
        with tf.variable_scope("reset_portion"):
          reset_portion = r*layer_norm(linear([state], self._num_units, False))
        with tf.variable_scope("inputs_portion"):
          inputs_portion = layer_norm(linear([inputs], self._num_units, False))
        c = tf.tanh(reset_portion + inputs_portion)

      new_h = u * state + (1 - u) * c
    return new_h, new_h
Exemple #8
0
  def __call__(self, inputs, state, timestep = 0, scope=None):
    current_state = state
    for highway_layer in range(self.num_highway_layers):
      with tf.variable_scope('highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          highway_factor = tf.tanh(layer_norm(linear([inputs, current_state], self._num_units, False)))
        else:
          highway_factor = tf.tanh(layer_norm(linear([current_state], self._num_units, False)))
      with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)):
        if self.use_inputs_on_each_layer or highway_layer == 0:
          gate_for_highway_factor = tf.sigmoid(linear([inputs, current_state], self._num_units, True, -3.0))
        else:
          gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0))

        gate_for_hidden_factor = 1.0 - gate_for_highway_factor

      current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

    return current_state, current_state
Exemple #9
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        '''Normal Gated recurrent unit (GRU) with nunits cells.'''
        with tf.variable_scope(scope or type(self).__name__):  # 'GRUCell'

            with tf.variable_scope('Inputs'):
                inputs_concat = linear(
                    [inputs], self._num_units * 2, False, 1.0)

                inputs_concat = layer_norm(
                    inputs_concat, num_variables_in_tensor=2, initial_bias_value=1.0)

            with tf.variable_scope('Hidden_State'):
                hidden_state_concat = linear(
                    [state], self._num_units * 2, False)

                hidden_state_concat = layer_norm(
                    hidden_state_concat, num_variables_in_tensor=2)

                r, u = tf.split(
                    value=tf.sigmoid(
                        multiplicative_integration(
                            [inputs_concat, hidden_state_concat],
                            output_size=2 * self._num_units, initial_bias_value=1.0,
                            weights_already_calculated=True)),
                    num_or_size_splits=2, axis=1)

            with tf.variable_scope('Candidate'):

                with tf.variable_scope('input_portion'):
                    input_portion = layer_norm(
                        linear([inputs], output_size=self._num_units, bias=False))
                with tf.variable_scope('reset_portion'):
                    reset_portion = r * layer_norm(linear([state], self._num_units, False))

                c = tf.tanh(
                    multiplicative_integration(
                        [input_portion, reset_portion],
                        output_size=self._num_units, initial_bias_value=0.0,
                        weights_already_calculated=True))

            new_h = u * state + (1 - u) * c

        return new_h, new_h
    def __call__(self, inputs, state, scope=None):
        with tf.device('/gpu:{}'.format(self._gpu_for_layer)):
            ## JZS3, mutant 2 with n units cells.
            with tf.variable_scope(scope or type(self).__name__):  # 'JZS1Cell'
                # Reset gate and update gate.
                with tf.variable_scope('ZInput'):
                    # We start with bias of 1.0 to not reset and not update.
                    ## equation 1

                    z = tf.sigmoid(
                        linear([inputs, tf.tanh(state)],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                ## equation 2
                with tf.variable_scope('RInput'):
                    r = tf.sigmoid(
                        linear([inputs, state],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                ## equation 3
                with tf.variable_scope('Candidate'):
                    component_0 = linear([state * r, inputs], self._num_units,
                                         True)
                    component_2 = (tf.tanh(component_0)) * z
                    component_3 = state * (1 - z)

                h_t = component_2 + component_3

            return h_t, h_t  # there is only one hidden state output to keep track of.
Exemple #11
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        '''Long short-term memory cell (LSTM).
        The idea with iteration would be to run different batch norm mean
        and variance stats on timestep greater than 10.
        '''
        with tf.variable_scope(scope or type(self).__name__):  # 'BasicLSTMCell'
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            h, c = tf.split(value=state, num_or_size_splits=2, axis=1)

            '''note that bias is set to 0 because batch norm bias is added later'''
            with tf.variable_scope('inputs_weight_matrix'):
                inputs_concat = linear([inputs], output_size=4 * self._num_units, bias=False)

                inputs_concat = layer_norm(
                    inputs_concat, num_variables_in_tensor=4, scope='inputs_concat_layer_norm')

            with tf.variable_scope('state_weight_matrix'):
                h_concat = linear([h], 4 * self._num_units, False)
                h_concat = layer_norm(
                    h_concat, num_variables_in_tensor=4, scope='h_concat_layer_norm')

            i, j, f, o = tf.split(
                value=multiplicative_integration(
                    [inputs_concat, h_concat],
                    output_size=4 * self._num_units, initial_bias_value=0.0,
                    weights_already_calculated=True),
                num_or_size_splits=4, axis=1)

            new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j)

            '''apply layer norm to the hidden state transition'''
            with tf.variable_scope('layer_norm_hidden_state'):
                new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

        return new_h, tf.concat(axis=1, values=[new_h, new_c])  # reversed this
Exemple #12
0
  def __call__(self, inputs, state, timestep = 0, scope=None):
    # with tf.device("/gpu:"+str(self._gpu_for_layer)):
    """Long short-term memory cell (LSTM)."""
    with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      h, c = tf.split(axis=1, num_or_size_splits=2, value=state)

      concat = linear([inputs, h], self._num_units * 4, False, 0.0)

      concat = layer_norm(concat, num_variables_in_tensor = 4)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = tf.split(axis=1, num_or_size_splits=4, value=concat)

      if self.use_recurrent_dropout and self.is_training:
        input_contribution = tf.nn.dropout(tf.tanh(j), self.recurrent_dropout_factor)
      else:
        input_contribution = tf.tanh(j)

      new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution
      with tf.variable_scope('new_h_output'):
        new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

    return new_h, tf.concat(axis=1, values=[new_h, new_c]) #purposely reversed
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope
                               or type(self).__name__):  # 'BasicLSTMCell'
            # Parameters of gates are concatenated into one multiply for efficiency.
            hidden_state_plus_c_list = tf.split(
                state, num_or_size_splits=self.num_memory_arrays + 1, axis=1)

            h = hidden_state_plus_c_list[0]
            c_list = hidden_state_plus_c_list[1:]

            ## very large matrix multiplication to speed up procedure
            ## -- will split variables out later
            if self.use_multiplicative_integration:
                concat = multiplicative_integration(
                    [inputs, h], self._num_units * 4 * self.num_memory_arrays,
                    0.0)
            else:
                concat = linear([inputs, h],
                                self._num_units * 4 * self.num_memory_arrays,
                                True)

            if self.use_layer_normalization:
                concat = layer_norm(concat,
                                    num_variables_in_tensor=4 *
                                    self.num_memory_arrays)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            # -- comes in sets of fours
            all_vars_list = tf.split(concat,
                                     num_or_size_splits=4 *
                                     self.num_memory_arrays,
                                     axis=1)

            ## memory array loop
            new_c_list, new_h_list = [], []
            for array_counter in range(self.num_memory_arrays):

                i = all_vars_list[0 + array_counter * 4]
                j = all_vars_list[1 + array_counter * 4]
                f = all_vars_list[2 + array_counter * 4]
                o = all_vars_list[3 + array_counter * 4]

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c_list.append(c_list[array_counter] *
                                  tf.sigmoid(f + self._forget_bias) +
                                  tf.sigmoid(i) * input_contribution)

                if self.use_layer_normalization:
                    new_c = layer_norm(new_c_list[-1])
                else:
                    new_c = new_c_list[-1]

                new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o))

            ## sum all new_h components -- could instead do a mean -- but investigate that later
            new_h = tf.add_n(new_h_list)

        return new_h, tf.concat([new_h] + new_c_list,
                                axis=1)  # purposely reversed