Ejemplo n.º 1
0
    def __call__(self, inputs, state, timestep=0, scope=None):

        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units))
                else:
                    highway_factor = tf.tanh(
                        layer_norm(
                            linear([current_state], self._num_units, True)))

            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units,
                                                   initial_bias_value=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1 - gate_for_highway_factor

                if self.use_recurrent_dropout and self.is_training:
                    highway_factor = tf.nn.dropout(
                        highway_factor, self.recurrent_dropout_factor)

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
Ejemplo n.º 2
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        linear([inputs, current_state], self._num_units, True))
                else:
                    highway_factor = tf.tanh(
                        linear([current_state], self._num_units, True))
            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([inputs, current_state], self._num_units, True,
                               -3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1.0 - gate_for_highway_factor

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
Ejemplo n.º 3
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """Long short-term memory cell (LSTM)."""
            with tf.variable_scope(scope
                                   or type(self).__name__):  # "BasicLSTMCell"
                # Parameters of gates are concatenated into one multiply for efficiency.
                h, c = tf.split(1, 2, state)

                concat = multiplicative_integration([inputs, h],
                                                    self._num_units * 4, 0.0)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                i, j, f, o = tf.split(1, 4, concat)

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(
                    i) * input_contribution
                new_h = tf.tanh(new_c) * tf.sigmoid(o)

            return new_h, tf.concat(1, [new_h, new_c])  # purposely reversed
Ejemplo n.º 4
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            hidden_state_plus_c_list = tf.split(1, self.num_memory_arrays + 1,
                                                state)

            h = hidden_state_plus_c_list[0]
            c_list = hidden_state_plus_c_list[1:]
            '''very large matrix multiplication to speed up procedure -- will split variables out later'''

            if self.use_multiplicative_integration:
                concat = multiplicative_integration(
                    [inputs, h], self._num_units * 4 * self.num_memory_arrays,
                    0.0)
            else:
                concat = linear([inputs, h],
                                self._num_units * 4 * self.num_memory_arrays,
                                True)

            if self.use_layer_normalization:
                concat = layer_norm(concat,
                                    num_variables_in_tensor=4 *
                                    self.num_memory_arrays)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate -- comes in sets of fours
            all_vars_list = tf.split(1, 4 * self.num_memory_arrays, concat)
            '''memory array loop'''
            new_c_list, new_h_list = [], []
            for array_counter in xrange(self.num_memory_arrays):

                i = all_vars_list[0 + array_counter * 4]
                j = all_vars_list[1 + array_counter * 4]
                f = all_vars_list[2 + array_counter * 4]
                o = all_vars_list[3 + array_counter * 4]

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c_list.append(c_list[array_counter] *
                                  tf.sigmoid(f + self._forget_bias) +
                                  tf.sigmoid(i) * input_contribution)

                if self.use_layer_normalization:
                    new_c = layer_norm(new_c_list[-1])
                else:
                    new_c = new_c_list[-1]

                new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o))
            '''sum all new_h components -- could instead do a mean -- but investigate that later'''
            new_h = tf.add_n(new_h_list)

        return new_h, tf.concat(1, [new_h] + new_c_list)  # purposely reversed
Ejemplo n.º 5
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Normal Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not udpate.
                r, u = tf.split(
                    1, 2,
                    tf.sigmoid(
                        multiplicative_integration([inputs, state],
                                                   self._num_units * 2, 1.0)))

            with tf.variable_scope(
                    "Candidate"
            ):  # you need a different one because you're doing a new linear
                # notice they have the activation/non-linear step right here!
                c = tf.tanh(
                    multiplicative_integration([inputs, state],
                                               self._num_units, 0.0))

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        c, self.recurrent_dropout_factor)
                else:
                    input_contribution = c

            new_h = u * state + (1 - u) * input_contribution

        return new_h, new_h
Ejemplo n.º 6
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            with tf.variable_scope(
                    "Gates"
            ):  # Forget Gate bias starts as 1.0 -- TODO: double check if this is correct
                if self.use_multiplicative_integration:
                    gated_factor = multiplicative_integration(
                        [inputs, state], self._num_units,
                        self.forget_bias_initialization)
                else:
                    gated_factor = linear([inputs, state], self._num_units,
                                          True,
                                          self.forget_bias_initialization)

                gated_factor = tf.sigmoid(gated_factor)

            with tf.variable_scope("Candidate"):
                c = tf.tanh(linear([inputs], self._num_units, True, 0.0))

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        c, self.recurrent_dropout_factor)
                else:
                    input_contribution = c

            new_h = (1 -
                     gated_factor) * state + gated_factor * input_contribution

        return new_h, new_h
Ejemplo n.º 7
0
def apply_highway_gate(proposed_output, original_input, bias=-2.0):
    '''will apply a sigmoid transform_gate to any proposed output'''

    transform_gate = tf.sigmoid(
        linear_function(original_input,
                        proposed_output.get_shape()[1],
                        True,
                        bias,
                        scope='transform_lin_%d' % idx))
    carry_gate = 1.0 - transform_gate

    output = transform_gate * proposed_output + carry_gate * original_input
    return output
Ejemplo n.º 8
0
    def __call__(self, inputs, state, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """JZS1, mutant 1 with n units cells."""
            with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
                with tf.variable_scope(
                        "Zinput"):  # Reset gate and update gate.
                    # We start with bias of 1.0 to not reset and not update.
                    '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs'''

                    z = tf.sigmoid(
                        linear([inputs],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))

                with tf.variable_scope("Rinput"):
                    '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state'''

                    r = tf.sigmoid(
                        linear([inputs, state],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 3'''
                with tf.variable_scope("Candidate"):
                    component_0 = linear([r * state], self._num_units, True)
                    component_1 = tf.tanh(tf.tanh(inputs) + component_0)
                    component_2 = component_1 * z
                    component_3 = state * (1 - z)

                h_t = component_2 + component_3

            return h_t, h_t  # there is only one hidden state output to keep track of.
Ejemplo n.º 9
0
def highway(input_,
            output_size,
            num_layers=2,
            bias=-2.0,
            activation=tf.nn.relu,
            scope=None,
            use_batch_timesteps=False,
            use_l2_loss=True,
            timestep=-1):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).

    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y

    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.

    if you initially set the bias to -2, then you achieve a simple pass through layer

    use_batch_timesteps requires input to be 3d input [batch_size x timesteps x input_size] and will return a tensor of the exact same dimensions


    """
    if output_size == 'same': output_size = input_.get_shape()[-1]

    linear_function = linear.batch_timesteps_linear if use_batch_timesteps else linear.linear

    with tf.variable_scope(scope or 'highway_network'):
        output = input_
        for idx in xrange(num_layers):
            original_input = output

            transform_gate = tf.sigmoid(
                linear_function(original_input,
                                output_size,
                                True,
                                bias,
                                scope='transform_lin_%d' % idx,
                                timestep=timestep))
            proposed_output = activation(
                linear_function(original_input,
                                output_size,
                                True,
                                use_l2_loss=use_l2_loss,
                                scope='proposed_output_lin_%d' % idx,
                                timestep=timestep),
                'activation_output_lin_' + str(idx))

            carry_gate = 1.0 - transform_gate

            output = transform_gate * proposed_output + carry_gate * original_input
    return output
Ejemplo n.º 10
0
    def __call__(self, inputs, state, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """JZS3, mutant 2 with n units cells."""
            with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
                with tf.variable_scope(
                        "Zinput"):  # Reset gate and update gate.
                    # We start with bias of 1.0 to not reset and not update.
                    '''equation 1'''

                    z = tf.sigmoid(
                        linear([inputs, tf.tanh(state)],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 2'''
                with tf.variable_scope("Rinput"):
                    r = tf.sigmoid(
                        linear([inputs, state],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 3'''
                with tf.variable_scope("Candidate"):
                    component_0 = linear([state * r, inputs], self._num_units,
                                         True)

                    component_2 = (tf.tanh(component_0)) * z
                    component_3 = state * (1 - z)

                h_t = component_2 + component_3

            return h_t, h_t  # there is only one hidden state output to keep track of.
Ejemplo n.º 11
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Long short-term memory cell (LSTM).
        The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10
        """
        with tf.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            h, c = tf.split(1, 2, state)
            '''note that bias is set to 0 because batch norm bias is added later'''
            with tf.variable_scope('inputs_weight_matrix'):
                inputs_concat = linear([inputs], 4 * self._num_units, False)

                inputs_concat = layer_norm(inputs_concat,
                                           num_variables_in_tensor=4,
                                           scope="inputs_concat_layer_norm")

            with tf.variable_scope('state_weight_matrix'):
                h_concat = linear([h], 4 * self._num_units, False)
                h_concat = layer_norm(h_concat,
                                      num_variables_in_tensor=4,
                                      scope="h_concat_layer_norm")

            i, j, f, o = tf.split(
                1, 4,
                multiplicative_integration([inputs_concat, h_concat],
                                           4 * self._num_units,
                                           0.0,
                                           weights_already_calculated=True))

            new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(
                i) * tf.tanh(j)
            '''apply layer norm to the hidden state transition'''
            with tf.variable_scope('layer_norm_hidden_state'):
                new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

        return new_h, tf.concat(1, [new_h, new_c])  # reversed this
Ejemplo n.º 12
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Normal Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"

            with tf.variable_scope("Inputs"):
                inputs_concat = linear([inputs], self._num_units * 2, False,
                                       1.0)

                inputs_concat = layer_norm(inputs_concat,
                                           num_variables_in_tensor=2,
                                           initial_bias_value=1.0)

            with tf.variable_scope("Hidden_State"):
                hidden_state_concat = linear([state], self._num_units * 2,
                                             False)

                hidden_state_concat = layer_norm(hidden_state_concat,
                                                 num_variables_in_tensor=2)

                r, u = tf.split(
                    1, 2,
                    tf.sigmoid(
                        multiplicative_integration(
                            [inputs_concat, hidden_state_concat],
                            2 * self._num_units,
                            1.0,
                            weights_already_calculated=True)))

            with tf.variable_scope("Candidate"):
                with tf.variable_scope('input_portion'):
                    input_portion = layer_norm(
                        linear([inputs], self._num_units, False))
                with tf.variable_scope('reset_portion'):
                    reset_portion = r * layer_norm(
                        linear([state], self._num_units, False))

                c = tf.tanh(
                    multiplicative_integration(
                        [input_portion, reset_portion],
                        self._num_units,
                        0.0,
                        weights_already_calculated=True))

            new_h = u * state + (1 - u) * c

        return new_h, new_h
Ejemplo n.º 13
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                concated_r_u = layer_norm(linear([inputs, state], 2 * self._num_units, False, 1.0),
                                          num_variables_in_tensor=2, initial_bias_value=1.0)

                r, u = tf.split(1, 2, tf.sigmoid(concated_r_u))

            with tf.variable_scope("Candidate"):
                with tf.variable_scope("reset_portion"):
                    reset_portion = r * layer_norm(linear([state], self._num_units, False))
                with tf.variable_scope("inputs_portion"):
                    inputs_portion = layer_norm(linear([inputs], self._num_units, False))
                c = tf.tanh(reset_portion + inputs_portion)

            new_h = u * state + (1 - u) * c
        return new_h, new_h