예제 #1
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Normal Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not udpate.
                r, u = tf.split(
                    1, 2,
                    tf.sigmoid(
                        multiplicative_integration([inputs, state],
                                                   self._num_units * 2, 1.0)))

            with tf.variable_scope(
                    "Candidate"
            ):  # you need a different one because you're doing a new linear
                # notice they have the activation/non-linear step right here!
                c = tf.tanh(
                    multiplicative_integration([inputs, state],
                                               self._num_units, 0.0))

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        c, self.recurrent_dropout_factor)
                else:
                    input_contribution = c

            new_h = u * state + (1 - u) * input_contribution

        return new_h, new_h
예제 #2
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            with tf.variable_scope(
                    "Gates"
            ):  # Forget Gate bias starts as 1.0 -- TODO: double check if this is correct
                if self.use_multiplicative_integration:
                    gated_factor = multiplicative_integration(
                        [inputs, state], self._num_units,
                        self.forget_bias_initialization)
                else:
                    gated_factor = linear([inputs, state], self._num_units,
                                          True,
                                          self.forget_bias_initialization)

                gated_factor = tf.sigmoid(gated_factor)

            with tf.variable_scope("Candidate"):
                c = tf.tanh(linear([inputs], self._num_units, True, 0.0))

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        c, self.recurrent_dropout_factor)
                else:
                    input_contribution = c

            new_h = (1 -
                     gated_factor) * state + gated_factor * input_contribution

        return new_h, new_h
예제 #3
0
    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            unitary_hidden_state, secondary_cell_hidden_state = tf.split(
                1, 2, state)

            mat_in = tf.get_variable('mat_in',
                                     [self.input_size, self.state_size * 2])
            mat_out = tf.get_variable('mat_out',
                                      [self.state_size * 2, self.output_size])
            in_proj = tf.matmul(inputs, mat_in)
            in_proj_c = tf.complex(tf.split(1, 2, in_proj))
            out_state = modReLU(
                in_proj_c + ulinear(unitary_hidden_state, self.state_size),
                tf.get_variable(name='bias',
                                dtype=tf.float32,
                                shape=tf.shape(unitary_hidden_state),
                                initializer=tf.constant_initalizer(0.)),
                scope=scope)

        with tf.variable_scope('unitary_output'):
            '''computes data linear, unitary linear and summation -- TODO: should be complex output'''
            unitary_linear_output_real = linear.linear(
                [tf.real(out_state),
                 tf.imag(out_state), inputs], True, 0.0)

        with tf.variable_scope('scale_nonlinearity'):
            modulus = tf.complex_abs(unitary_linear_output_real)
            rescale = tf.maximum(modulus + hidden_bias, 0.) / (modulus + 1e-7)

        # transition to data shortcut connection

        # out_ = tf.matmul(tf.concat(1,[tf.real(out_state), tf.imag(out_state), ] ), mat_out) + out_bias

        # hidden state is complex but output is completely real
        return out_, out_state  # complex
예제 #4
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        linear([inputs, current_state], self._num_units, True))
                else:
                    highway_factor = tf.tanh(
                        linear([current_state], self._num_units, True))
            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([inputs, current_state], self._num_units, True,
                               -3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1.0 - gate_for_highway_factor

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
예제 #5
0
    def __call__(self, inputs, state, timestep=0, scope=None):

        current_state = state
        for highway_layer in xrange(self.num_highway_layers):
            with tf.variable_scope('highway_factor_' + str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    highway_factor = tf.tanh(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units))
                else:
                    highway_factor = tf.tanh(
                        layer_norm(
                            linear([current_state], self._num_units, True)))

            with tf.variable_scope('gate_for_highway_factor_' +
                                   str(highway_layer)):
                if self.use_inputs_on_each_layer or highway_layer == 0:
                    gate_for_highway_factor = tf.sigmoid(
                        multiplicative_integration([inputs, current_state],
                                                   self._num_units,
                                                   initial_bias_value=-3.0))
                else:
                    gate_for_highway_factor = tf.sigmoid(
                        linear([current_state], self._num_units, True, -3.0))

                gate_for_hidden_factor = 1 - gate_for_highway_factor

                if self.use_recurrent_dropout and self.is_training:
                    highway_factor = tf.nn.dropout(
                        highway_factor, self.recurrent_dropout_factor)

            current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor

        return current_state, current_state
예제 #6
0
def multiplicative_integration(list_of_inputs,
                               output_size,
                               initial_bias_value=0.0,
                               weights_already_calculated=False,
                               use_highway_gate=False,
                               use_l2_loss=False,
                               scope=None,
                               timestep=0):
    '''expects len(2) for list of inputs and will perform integrative multiplication

    weights_already_calculated will treat the list of inputs as Wx and Uz and is useful for batch normed inputs
    '''
    with tf.variable_scope(scope or 'double_inputs_multiple_integration'):
        if len(list_of_inputs) != 2:
            raise ValueError('list of inputs must be 2, you have:',
                             len(list_of_inputs))

        if weights_already_calculated:  # if you already have weights you want to insert from batch norm
            Wx = list_of_inputs[0]
            Uz = list_of_inputs[1]

        else:
            with tf.variable_scope('Calculate_Wx_mulint'):
                Wx = linear.linear(list_of_inputs[0],
                                   output_size,
                                   False,
                                   use_l2_loss=use_l2_loss,
                                   timestep=timestep)
            with tf.variable_scope("Calculate_Uz_mulint"):
                Uz = linear.linear(list_of_inputs[1],
                                   output_size,
                                   False,
                                   use_l2_loss=use_l2_loss,
                                   timestep=timestep)

        with tf.variable_scope("multiplicative_integration"):
            alpha = tf.get_variable(
                'mulint_alpha', [output_size],
                initializer=tf.truncated_normal_initializer(mean=1.0,
                                                            stddev=0.1))

            beta1, beta2 = tf.split(
                0, 2,
                tf.get_variable('mulint_params_betas', [output_size * 2],
                                initializer=tf.truncated_normal_initializer(
                                    mean=0.5, stddev=0.1)))

            original_bias = tf.get_variable(
                'mulint_original_bias', [output_size],
                initializer=tf.truncated_normal_initializer(
                    mean=initial_bias_value, stddev=0.1))

        final_output = alpha * Wx * Uz + beta1 * Uz + beta2 * Wx + original_bias

        if use_highway_gate:
            final_output = highway_network.apply_highway_gate(
                final_output, list_of_inputs[0])
    return final_output
예제 #7
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """Long short-term memory cell (LSTM)."""
            with tf.variable_scope(scope
                                   or type(self).__name__):  # "BasicLSTMCell"
                # Parameters of gates are concatenated into one multiply for efficiency.
                h, c = tf.split(1, 2, state)

                concat = multiplicative_integration([inputs, h],
                                                    self._num_units * 4, 0.0)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
                i, j, f, o = tf.split(1, 4, concat)

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(
                    i) * input_contribution
                new_h = tf.tanh(new_c) * tf.sigmoid(o)

            return new_h, tf.concat(1, [new_h, new_c])  # purposely reversed
예제 #8
0
    def __call__(self, inputs, state, scope=None):
        zero_initer = tf.constant_initializer(0.)
        with tf.variable_scope(scope or type(self).__name__):
            # nick there are these two matrix multiplications and they are used to convert regular input sizes to complex outputs -- makes sense -- we can further modify this for lstm configurations
            mat_in = tf.get_variable('W_in',
                                     [self.input_size, self.state_size * 2])
            mat_out = tf.get_variable('W_out',
                                      [self.state_size * 2, self.output_size])

            in_proj = tf.matmul(inputs, mat_in)
            in_proj_c = tf.complex(in_proj[:, :self.state_size],
                                   in_proj[:, self.state_size:])
            out_state = modrelu_c(
                in_proj_c + ulinear_c(state, transform=self.transform),
                tf.get_variable(name='B',
                                dtype=tf.float32,
                                shape=[self.state_size],
                                initializer=zero_initer))
            out_bias = tf.get_variable(name='B_out',
                                       dtype=tf.float32,
                                       shape=[self.output_size],
                                       initializer=zero_initer)
            out = tf.matmul(
                tf.concat(1, [tf.real(out_state),
                              tf.imag(out_state)]), mat_out) + out_bias
        return out, out_state
예제 #9
0
def linear(args, output_size, bias, bias_start=0.0, use_l2_loss=False,
           use_weight_normalization=use_weight_normalization_default, scope=None, timestep=-1, weight_initializer=None,
           orthogonal_scale_factor=1.1):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.

    Args:
      args: a 2D Tensor or a list of 2D, batch x n, Tensors.
      output_size: int, second dimension of W[i].
      bias: boolean, whether to add a bias term or not.
      bias_start: starting value to initialize the bias; 0 by default.
      scope: VariableScope for the created subgraph; defaults to "Linear".

    Returns:
      A 2D Tensor with shape [batch x output_size] equal to
      sum_i(args[i] * W[i]), where W[i]s are newly created matrices.

    Raises:
      ValueError: if some of the arguments has unspecified or wrong shape.
    """
    # assert args #was causing error in upgraded tensorflowsss
    if not isinstance(args, (list, tuple)):
        args = [args]

    if len(args) > 1 and use_weight_normalization: raise ValueError(
        'you can not use weight_normalization with multiple inputs because the euclidean norm will be incorrect -- besides, you should be using multiple integration instead!!!')

    # Calculate the total size of arguments on dimension 1.
    total_arg_size = 0
    shapes = [a.get_shape().as_list() for a in args]
    for shape in shapes:
        if len(shape) != 2:
            raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
        if not shape[1]:
            raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
        else:
            total_arg_size += shape[1]

    if use_l2_loss:
        l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
    else:
        l_regularizer = None

    # Now the computation.
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [total_arg_size, output_size],
                                 initializer=tf.uniform_unit_scaling_initializer(), regularizer=l_regularizer)
        if use_weight_normalization: matrix = weight_normalization(matrix, timestep=timestep)

        if len(args) == 1:
            res = tf.matmul(args[0], matrix)
        else:
            res = tf.matmul(tf.concat(1, args), matrix)

        if not bias:
            return res
        bias_term = tf.get_variable("Bias", [output_size],
                                    initializer=tf.constant_initializer(bias_start), regularizer=l_regularizer)

    return res + bias_term
예제 #10
0
 def __call__(self, inputs, state, timestep=0, scope=None):
     """Most basic RNN: output = new_state = tanh(W * input + U * state + B)."""
     with tf.device("/gpu:" + str(self._gpu_for_layer)):
         with tf.variable_scope(scope
                                or type(self).__name__):  # "BasicRNNCell"
             output = tf.tanh(
                 multiplicative_integration([inputs, state],
                                            self._num_units))
         return output, output
예제 #11
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        with tf.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            hidden_state_plus_c_list = tf.split(1, self.num_memory_arrays + 1,
                                                state)

            h = hidden_state_plus_c_list[0]
            c_list = hidden_state_plus_c_list[1:]
            '''very large matrix multiplication to speed up procedure -- will split variables out later'''

            if self.use_multiplicative_integration:
                concat = multiplicative_integration(
                    [inputs, h], self._num_units * 4 * self.num_memory_arrays,
                    0.0)
            else:
                concat = linear([inputs, h],
                                self._num_units * 4 * self.num_memory_arrays,
                                True)

            if self.use_layer_normalization:
                concat = layer_norm(concat,
                                    num_variables_in_tensor=4 *
                                    self.num_memory_arrays)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate -- comes in sets of fours
            all_vars_list = tf.split(1, 4 * self.num_memory_arrays, concat)
            '''memory array loop'''
            new_c_list, new_h_list = [], []
            for array_counter in xrange(self.num_memory_arrays):

                i = all_vars_list[0 + array_counter * 4]
                j = all_vars_list[1 + array_counter * 4]
                f = all_vars_list[2 + array_counter * 4]
                o = all_vars_list[3 + array_counter * 4]

                if self.use_recurrent_dropout and self.is_training:
                    input_contribution = tf.nn.dropout(
                        tf.tanh(j), self.recurrent_dropout_factor)
                else:
                    input_contribution = tf.tanh(j)

                new_c_list.append(c_list[array_counter] *
                                  tf.sigmoid(f + self._forget_bias) +
                                  tf.sigmoid(i) * input_contribution)

                if self.use_layer_normalization:
                    new_c = layer_norm(new_c_list[-1])
                else:
                    new_c = new_c_list[-1]

                new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o))
            '''sum all new_h components -- could instead do a mean -- but investigate that later'''
            new_h = tf.add_n(new_h_list)

        return new_h, tf.concat(1, [new_h] + new_c_list)  # purposely reversed
예제 #12
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                concated_r_u = layer_norm(linear([inputs, state], 2 * self._num_units, False, 1.0),
                                          num_variables_in_tensor=2, initial_bias_value=1.0)

                r, u = tf.split(1, 2, tf.sigmoid(concated_r_u))

            with tf.variable_scope("Candidate"):
                with tf.variable_scope("reset_portion"):
                    reset_portion = r * layer_norm(linear([state], self._num_units, False))
                with tf.variable_scope("inputs_portion"):
                    inputs_portion = layer_norm(linear([inputs], self._num_units, False))
                c = tf.tanh(reset_portion + inputs_portion)

            new_h = u * state + (1 - u) * c
        return new_h, new_h
예제 #13
0
def main(_):
    if not FLAGS.data_path:
        raise ValueError("Must set --data_path to PTB data directory")

    raw_data = reader.ptb_raw_data(FLAGS.data_path)
    train_data, valid_data, test_data, _ = raw_data

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 1

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            m = PTBModel(is_training=True, config=config)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            mvalid = PTBModel(is_training=False, config=config)
            mtest = PTBModel(is_training=False, config=eval_config)

        tf.initialize_all_variables().run()

        for i in range(config.max_max_epoch):
            lr_decay = config.lr_decay**max(i - config.max_epoch, 0.0)
            m.assign_lr(session, config.learning_rate * lr_decay)

            print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
            train_perplexity = run_epoch(session,
                                         m,
                                         train_data,
                                         m.train_op,
                                         verbose=True)
            print("Epoch: %d Train Perplexity: %.3f" %
                  (i + 1, train_perplexity))
            valid_perplexity = run_epoch(session, mvalid, valid_data,
                                         tf.no_op())
            print("Epoch: %d Valid Perplexity: %.3f" %
                  (i + 1, valid_perplexity))

        test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
        print("Test Perplexity: %.3f" % test_perplexity)
예제 #14
0
def layer_norm(input_tensor,
               num_variables_in_tensor=1,
               initial_bias_value=0.0,
               scope="layer_norm"):
    with tf.variable_scope(scope):
        '''for clarification of shapes:
        input_tensor = [batch_size, num_neurons]
        mean = [batch_size]
        variance = [batch_size]
        alpha = [num_neurons]
        bias = [num_neurons]
        output = [batch_size, num_neurons]
        '''
        input_tensor_shape_list = input_tensor.get_shape().as_list()

        num_neurons = input_tensor_shape_list[1] / num_variables_in_tensor

        alpha = tf.get_variable('layer_norm_alpha',
                                [num_neurons * num_variables_in_tensor],
                                initializer=tf.constant_initializer(1.0))

        bias = tf.get_variable(
            'layer_norm_bias', [num_neurons * num_variables_in_tensor],
            initializer=tf.constant_initializer(initial_bias_value))

        if num_variables_in_tensor == 1:
            input_tensor_list = [input_tensor]
            alpha_list = [alpha]
            bias_list = [bias]

        else:
            input_tensor_list = tf.split(1, num_variables_in_tensor,
                                         input_tensor)
            alpha_list = tf.split(0, num_variables_in_tensor, alpha)
            bias_list = tf.split(0, num_variables_in_tensor, bias)

        list_of_layer_normed_results = []
        for counter in xrange(num_variables_in_tensor):
            mean, variance = moments_for_layer_norm(
                input_tensor_list[counter],
                axes=[1],
                name="moments_loopnum_" + str(counter) +
                scope)  # average across layer

            output = (
                alpha_list[counter] *
                (input_tensor_list[counter] - mean)) / variance + bias[counter]

            list_of_layer_normed_results.append(output)

        if num_variables_in_tensor == 1:
            return list_of_layer_normed_results[0]
        else:
            return tf.concat(1, list_of_layer_normed_results)
예제 #15
0
def batch_timesteps_linear(input, output_size, bias, bias_start=0.0, use_l2_loss=False,
                           use_weight_normalization=use_weight_normalization_default, scope=None,
                           tranpose_input=True, timestep=-1):
    """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
    Args:
      args: a 3D Tensor [timesteps, batch_size, input_size]
      output_size: int, second dimension of W[i].
      bias: boolean, whether to add a bias term or not.
      bias_start: starting value to initialize the bias; 0 by default.
      scope: VariableScope for the created subgraph; defaults to "Linear".
    Returns:
      A 2D Tensor with shape [batch x output_size] equal to
      sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
    Raises:
      ValueError: if some of the arguments has unspecified or wrong shape.
    """
    # Calculate the total size of arguments on dimension 2.
    if tranpose_input:
        input = tf.transpose(input, [1, 0, 2])

    shape_list = input.get_shape().as_list()
    if len(shape_list) != 3: raise ValueError('shape must be of size 3, you have inputted shape size of:',
                                              len(shape_list))

    num_timesteps = shape_list[0]
    batch_size = shape_list[1]
    total_arg_size = shape_list[2]

    if use_l2_loss:
        l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
    else:
        l_regularizer = None

    # Now the computation.
    with tf.variable_scope(scope or "Linear"):
        matrix = tf.get_variable("Matrix", [total_arg_size, output_size],
                                 initializer=tf.uniform_unit_scaling_initializer(), regularizer=l_regularizer)
        if use_weight_normalization: matrix = weight_normalization(matrix)
        matrix = tf.tile(tf.expand_dims(matrix, 0), [num_timesteps, 1, 1])

        res = tf.batch_matmul(input, matrix)

        if bias:
            bias_term = tf.get_variable(
                "Bias", [output_size],
                initializer=tf.constant_initializer(bias_start))
            res = res + bias_term

    if tranpose_input:
        res = tf.transpose(res, [1, 0, 2])

    return res
예제 #16
0
    def __call__(self, inputs, state, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """JZS1, mutant 1 with n units cells."""
            with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
                with tf.variable_scope(
                        "Zinput"):  # Reset gate and update gate.
                    # We start with bias of 1.0 to not reset and not update.
                    '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs'''

                    z = tf.sigmoid(
                        linear([inputs],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))

                with tf.variable_scope("Rinput"):
                    '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state'''

                    r = tf.sigmoid(
                        linear([inputs, state],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 3'''
                with tf.variable_scope("Candidate"):
                    component_0 = linear([r * state], self._num_units, True)
                    component_1 = tf.tanh(tf.tanh(inputs) + component_0)
                    component_2 = component_1 * z
                    component_3 = state * (1 - z)

                h_t = component_2 + component_3

            return h_t, h_t  # there is only one hidden state output to keep track of.
예제 #17
0
def highway(input_,
            output_size,
            num_layers=2,
            bias=-2.0,
            activation=tf.nn.relu,
            scope=None,
            use_batch_timesteps=False,
            use_l2_loss=True,
            timestep=-1):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).

    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y

    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.

    if you initially set the bias to -2, then you achieve a simple pass through layer

    use_batch_timesteps requires input to be 3d input [batch_size x timesteps x input_size] and will return a tensor of the exact same dimensions


    """
    if output_size == 'same': output_size = input_.get_shape()[-1]

    linear_function = linear.batch_timesteps_linear if use_batch_timesteps else linear.linear

    with tf.variable_scope(scope or 'highway_network'):
        output = input_
        for idx in xrange(num_layers):
            original_input = output

            transform_gate = tf.sigmoid(
                linear_function(original_input,
                                output_size,
                                True,
                                bias,
                                scope='transform_lin_%d' % idx,
                                timestep=timestep))
            proposed_output = activation(
                linear_function(original_input,
                                output_size,
                                True,
                                use_l2_loss=use_l2_loss,
                                scope='proposed_output_lin_%d' % idx,
                                timestep=timestep),
                'activation_output_lin_' + str(idx))

            carry_gate = 1.0 - transform_gate

            output = transform_gate * proposed_output + carry_gate * original_input
    return output
예제 #18
0
    def __call__(self, inputs, state, scope=None):
        with tf.device("/gpu:" + str(self._gpu_for_layer)):
            """JZS3, mutant 2 with n units cells."""
            with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
                with tf.variable_scope(
                        "Zinput"):  # Reset gate and update gate.
                    # We start with bias of 1.0 to not reset and not update.
                    '''equation 1'''

                    z = tf.sigmoid(
                        linear([inputs, tf.tanh(state)],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 2'''
                with tf.variable_scope("Rinput"):
                    r = tf.sigmoid(
                        linear([inputs, state],
                               self._num_units,
                               True,
                               1.0,
                               weight_initializer=self._weight_initializer,
                               orthogonal_scale_factor=self.
                               _orthogonal_scale_factor))
                    '''equation 3'''
                with tf.variable_scope("Candidate"):
                    component_0 = linear([state * r, inputs], self._num_units,
                                         True)

                    component_2 = (tf.tanh(component_0)) * z
                    component_3 = state * (1 - z)

                h_t = component_2 + component_3

            return h_t, h_t  # there is only one hidden state output to keep track of.
예제 #19
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Long short-term memory cell (LSTM).
        The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10
        """
        with tf.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            h, c = tf.split(1, 2, state)
            '''note that bias is set to 0 because batch norm bias is added later'''
            with tf.variable_scope('inputs_weight_matrix'):
                inputs_concat = linear([inputs], 4 * self._num_units, False)

                inputs_concat = layer_norm(inputs_concat,
                                           num_variables_in_tensor=4,
                                           scope="inputs_concat_layer_norm")

            with tf.variable_scope('state_weight_matrix'):
                h_concat = linear([h], 4 * self._num_units, False)
                h_concat = layer_norm(h_concat,
                                      num_variables_in_tensor=4,
                                      scope="h_concat_layer_norm")

            i, j, f, o = tf.split(
                1, 4,
                multiplicative_integration([inputs_concat, h_concat],
                                           4 * self._num_units,
                                           0.0,
                                           weights_already_calculated=True))

            new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(
                i) * tf.tanh(j)
            '''apply layer norm to the hidden state transition'''
            with tf.variable_scope('layer_norm_hidden_state'):
                new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o)

        return new_h, tf.concat(1, [new_h, new_c])  # reversed this
예제 #20
0
    def __call__(self, inputs, state, timestep=0, scope=None):
        """Normal Gated recurrent unit (GRU) with nunits cells."""
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"

            with tf.variable_scope("Inputs"):
                inputs_concat = linear([inputs], self._num_units * 2, False,
                                       1.0)

                inputs_concat = layer_norm(inputs_concat,
                                           num_variables_in_tensor=2,
                                           initial_bias_value=1.0)

            with tf.variable_scope("Hidden_State"):
                hidden_state_concat = linear([state], self._num_units * 2,
                                             False)

                hidden_state_concat = layer_norm(hidden_state_concat,
                                                 num_variables_in_tensor=2)

                r, u = tf.split(
                    1, 2,
                    tf.sigmoid(
                        multiplicative_integration(
                            [inputs_concat, hidden_state_concat],
                            2 * self._num_units,
                            1.0,
                            weights_already_calculated=True)))

            with tf.variable_scope("Candidate"):
                with tf.variable_scope('input_portion'):
                    input_portion = layer_norm(
                        linear([inputs], self._num_units, False))
                with tf.variable_scope('reset_portion'):
                    reset_portion = r * layer_norm(
                        linear([state], self._num_units, False))

                c = tf.tanh(
                    multiplicative_integration(
                        [input_portion, reset_portion],
                        self._num_units,
                        0.0,
                        weights_already_calculated=True))

            new_h = u * state + (1 - u) * c

        return new_h, new_h
예제 #21
0
def ulinear_c(vec_in_c, scope=None, transform='fourier'):
    '''
    Multiply complex vector by parameterized unitary matrix.
    Equation: W = D2 R1 IT D1 Perm R0 FT D0
    '''
    if not vec_in_c.dtype.is_complex:
        raise ValueError('Argument vec_in_c must be complex valued.')
    shape = vec_in_c.get_shape().as_list()
    if len(shape) != 2:
        raise ValueError(
            'Argument vec_in_c must be a batch of vectors (2D tensor).')
    if transform == 'fourier':
        fwd_trans = tf.batch_fft
        inv_trans = tf.batch_ifft
    elif transform == 'hadamard':
        fwd_trans = batch_fht
        inv_trans = batch_fht
    in_size = shape[1]
    with tf.variable_scope(scope or 'ULinear') as _s:
        diag = [get_unit_variable_c('diag' + i, _s, [in_size]) for i in '012']
        refl = [
            normalize_c(
                get_variable_c('refl' + i, [in_size],
                               initializer=tf.random_uniform_initializer(
                                   -1., 1.))) for i in '01'
        ]
        perm0 = tf.constant(np.random.permutation(in_size),
                            name='perm0',
                            dtype='int32')
        out = vec_in_c * diag[0]
        out = refl_c(fwd_trans(out), refl[0])
        out = diag[1] * tf.transpose(tf.gather(tf.transpose(out), perm0))
        out = diag[2] * refl_c(inv_trans(out), refl[1])
        if transform == 'fourier':
            return out
        elif transform == 'hadamard':
            return out * (1. / in_size)
예제 #22
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=1.0, state_is_tuple=True)
        # rnn_cell = rnn_cell_modern.HighwayRNNCell(size)
        # rnn_cell = rnn_cell_modern.JZS1Cell(size)
        # rnn_cell = rnn_cell_mulint_modern.BasicRNNCell_MulInt(size)
        # rnn_cell = rnn_cell_mulint_modern.GRUCell_MulInt(size)
        # rnn_cell = rnn_cell_mulint_modern.BasicLSTMCell_MulInt(size)
        # rnn_cell = rnn_cell_mulint_modern.HighwayRNNCell_MulInt(size)
        # rnn_cell = rnn_cell_mulint_layernorm_modern.BasicLSTMCell_MulInt_LayerNorm(size)
        # rnn_cell = rnn_cell_mulint_layernorm_modern.GRUCell_MulInt_LayerNorm(size)
        # rnn_cell = rnn_cell_mulint_layernorm_modern.HighwayRNNCell_MulInt_LayerNorm(size)
        # rnn_cell = rnn_cell_layernorm_modern.BasicLSTMCell_LayerNorm(size)
        # rnn_cell = rnn_cell_layernorm_modern.GRUCell_LayerNorm(size)
        # rnn_cell = rnn_cell_layernorm_modern.HighwayRNNCell_LayerNorm(size)
        # rnn_cell = rnn_cell_modern.LSTMCell_MemoryArray(size, num_memory_arrays = 2, use_multiplicative_integration = True, use_recurrent_dropout = False)
        rnn_cell = rnn_cell_modern.MGUCell(size,
                                           use_multiplicative_integration=True,
                                           use_recurrent_dropout=False)

        if is_training and config.keep_prob < 1:
            rnn_cell = tf.nn.rnn_cell.DropoutWrapper(
                rnn_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([rnn_cell] * config.num_layers,
                                           state_is_tuple=True)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflowsss.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # from tensorflowsss.models.rnn import rnn
        # inputs = [tf.squeeze(input_, [1])
        #           for input_ in tf.split(1, num_steps, inputs)]
        # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[time_step], state)
                outputs.append(cell_output)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.transpose(embedding)  # weight tying
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        # optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self._train_op = optimizer.apply_gradients(zip(grads, tvars))