Esempio n. 1
0
def model(input, vocab_size):

    # 构建随机的词向量矩阵
    # tf.get_variable(name,  shape, initializer): name变量的名称,shape变量的维度,initializer变量初始化的方式
    embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size],
                                 initializer=tf.truncated_normal_initializer)
    embedded = tf.nn.embedding_lookup(embeddings, input)

    # 将数据处理成LSTM的输入格式(时序)
    rnn_input = tf.unstack(embedded,
                           max_document_length,
                           axis=1,
                           name="rnn-input")

    # 定义LSTM
    lstm_cell = BasicLSTMCell(20, forget_bias=1.0)
    rnn_outputs, rnn_states = static_rnn(lstm_cell,
                                         rnn_input,
                                         dtype=tf.float32)

    # predict
    logits = tf.layers.dense(rnn_outputs[-1], num_classes)
    predicted_labels = tf.argmax(logits, axis=1)

    return predicted_labels, [embeddings, embedded, lstm_cell, logits]
Esempio n. 2
0
def biLSTM(x, hidden_size):
    # biLSTM��
    # ���ܣ����bidirectional_lstm����
    # ������
    # 	x: [batch, height, width]   / [batch, step, embedding_size]
    # 	hidden_size: lstm���ز�ڵ����
    # �����
    # 	output: [batch, height, 2*hidden_size]  / [batch, step, 2*hidden_size]

    # input transformation
    input_x = tf.transpose(x, [1, 0, 2])
    # input_x = tf.reshape(input_x, [-1, w])
    # input_x = tf.split(0, h, input_x)
    input_x = tf.unstack(input_x)

    # define the forward and backward lstm cells
    # lstm_fw_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
    lstm_cell = rnn.BasicLSTMCell(hidden_size,
                                  forget_bias=1.0,
                                  state_is_tuple=True)  # 修改的
    # lstm_bw_cell = rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
    output, states = rnn.static_rnn(lstm_cell, input_x, dtype=tf.float32)

    # output transformation to the original tensor type
    output = tf.stack(output)
    output = tf.transpose(output, [1, 0, 2])
    return output
Esempio n. 3
0
def recurrent_neural_network(x):

    # create a layer of rnn_size
    layer = {
        'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
        'biases': tf.Variable(tf.random_normal([n_classes]))
    }

    # RNN TAKES IN SPECIFIC DATA STRUCTURE, THEREFORE RESHAPE
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, chunk_size])
    # x = tf.split(0, n_chunks, x)
    x = tf.split(x, n_chunks, 0)

    # CREATE A BASIC RNN CELL (LOOPS RNN_SIZE TIMES????)
    # LSTM IS HOW RNN FORGETS SOME INFO
    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size)
    # GET THE OUTPUT OF RNN AND STATES (NOT SURE WHAT STATES IS...)
    # RNN PROBABLY HAS HIDDEN BACKGROUND FUNCTIONALITY
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # RETURN THE OUTPUT'S LAST MULTIPLED BY THE LAYER WEIGHTS then add biases
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    return output
Esempio n. 4
0
def recurrent_neural_network(x):
    # Defining the layers and the output
    # weights: weights of each input going into a layer
    # biases: added after weights. what happens is we have
    # (input*weight)+biases. Biases will make sure a neuron may still fire if
    # all inputs are 0.
    # rnn_size:
    layer = {
        'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
        'biases': tf.Variable(tf.random_normal([n_classes]))
    }

    # checknumpy's tranpose for details. Basically changes the shape (from e.g.
    # (1,2,3) to (2,1,3). debug print to figure out
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, chunk_size])
    x = tf.split(x, n_chunks, 0)

    # rnn_cell is defined by tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # output layer has no activation
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    return output
Esempio n. 5
0
def recurrent_neural_network(x):
    #create dictionaries for hidden layer weights and biases
    layer = {
        'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
        'biases': tf.Variable(tf.random_normal([n_classes]))
    }

    #formatting and modifying data
    '''
	e.g., for 5x5 image
	x = np.ones((1, 5, 5)) = np.ones((None, n_chunks, chunk_size))
	x = array([[
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1]
			]])
	After transpose, swap 0th and 1st dimension, so x = np.ones((5, 1, 5)) = np.ones((n_chunks, None, chunk_size))
	x = array([
				[[1, 1, 1, 1, 1]],
				[[1, 1, 1, 1, 1]],
				[[1, 1, 1, 1, 1]],
				[[1, 1, 1, 1, 1]],
				[[1, 1, 1, 1, 1]]
			])
	After reshape, flatten by one dimension
	x = np.ones((5, 5)) = np.ones((n_chunks, chunk_size))

	x = array([
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1],
				[1, 1, 1, 1, 1]
			])

	After split, split into 5 chunks/5 arrays

	x = [
		array([[1, 1, 1, 1, 1]]),
		array([[1, 1, 1, 1, 1]]),
		array([[1, 1, 1, 1, 1]]),
		array([[1, 1, 1, 1, 1]]),
		array([[1, 1, 1, 1, 1]])
	 ]
	'''

    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, chunk_size])
    x = tf.split(x, n_chunks, 0)

    #create long-short-term-memory cell
    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    print("OUTPUTS:", outputs[-1])

    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    return output
Esempio n. 6
0
    def lstm_model(self, x, is_training):


        layer = {'weights': tf.Variable(tf.random_normal([self.rnn_size, 4])),
                 'biases': tf.Variable(tf.random_normal([4]))}
        outputs = []
        x = tf.transpose(x, [1, 0, 2, 3])
        x_ = tf.unstack(x)
        lstm_cells = []

        i=0

        for x_entry in x_:
            # x_hold = tf.transpose(x_entry[:,2], [2, 0, 1])
            x_hold = tf.reshape(x_entry[:,:,2], [-1, self.chunk_size])
            x_hold = tf.split(x_hold,self. n_chunks, 0)

            scope_name = 'lstm_'+self.name+str(i)
            with tf.variable_scope(scope_name):
                lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size, state_is_tuple=True)
                lstm_cells.append(lstm_cell)
                output, states = rnn.static_rnn(lstm_cells[-1], x_hold, dtype=tf.float32)
                # outputs =tf.reshape(outputs,[-1])
                rnn_result = tf.matmul(output[-1], layer['weights']) + layer['biases']
                # rnn_result = tf.layers.dense(output[-1],4)
                tf.layers.dropout(rnn_result,self.drop_rate, seed=232, training=is_training)
                rnn_result = tf.nn.tanh(rnn_result)
            i+=1

        return rnn_result
Esempio n. 7
0
def recurrent_neural_network(vector, n_classes, chunk_size, n_chunks):
    """Create the neural network model.

    Args:
        vector: Vector data
        n_classes: Number of classes
        vector_length: Length of vector making up the tensor

    Returns:
        output: Output

    """
    # Initialize key variables
    rnn_size = 128

    layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
             'biases': tf.Variable(tf.random_normal([n_classes]))}

    vector = tf.transpose(vector, [1, 0, 2])
    vector = tf.reshape(vector, [-1, chunk_size])
    vector = tf.split(vector, n_chunks, 0)

    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, vector, dtype=tf.float32)
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    # Return
    return output
Esempio n. 8
0
def reccurent_neural_network(x):
    layer = {
        'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
        'biases': tf.Variable(tf.random_normal([n_classes]))
    }

    print(x)
    x = tf.transpose(x, [1, 0, 2])
    print(x)
    x = tf.reshape(x, [-1, chunk_size])
    # x = tf.split(0, n_chunks, x)
    x = tf.split(x, n_chunks, 0)
    print(x)

    # lstm_cell = rnn_cell.BasicLSTMCell(rnn_size)
    # outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
    # outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    print(x)

    return output
Esempio n. 9
0
  def testDynamicAttentionDecoderStateIsTuple(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        cell_fn = lambda: rnn_cell.MultiRNNCell(  # pylint: disable=g-long-lambda
            cells=[rnn_cell.BasicLSTMCell(2) for _ in range(2)])
        cell = cell_fn()
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in enc_outputs
        ], 1)
        dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.attention_decoder(
            dec_inp, enc_state, attn_states, cell_fn(), output_size=4)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 4), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(2, len(res[0]))
        self.assertEqual((2, 2), res[0][0].c.shape)
        self.assertEqual((2, 2), res[0][0].h.shape)
        self.assertEqual((2, 2), res[0][1].c.shape)
        self.assertEqual((2, 2), res[0][1].h.shape)
Esempio n. 10
0
  def testAttentionDecoder2(self):
    with self.cached_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        cell_fn = lambda: rnn_cell.GRUCell(2)
        cell = cell_fn()
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.attention_decoder(
            dec_inp, enc_state, attn_states, cell_fn(),
            output_size=4, num_heads=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 4), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
Esempio n. 11
0
    def recurrent_neural_network(self, x):
        layer = {
            'weights':
            tf.Variable(tf.random_normal([self.rnn_size, self.n_classes])),
            'biases':
            tf.Variable(tf.random_normal([self.n_classes]))
        }

        #     layer = {'weights':tf.Variable(np.random.normal(size=(rnn_size,n_classes)).astype('float32')),
        #              'biases':tf.Variable(tf.random_normal([n_classes]))}

        x = tf.transpose(x, [1, 0, 2])
        x = tf.reshape(x, [-1, self.chunk_size])
        x = tf.split(x, self.seq_len, 0)
        lstm_cells = []

        for _ in range(self.num_layers):
            cell = tf.contrib.rnn.BasicRNNCell(self.rnn_size)
            if self.attention:
                cell = tf.contrib.rnn.AttentionCellWrapper(cell, self.seq_len)

            cell = tf.contrib.rnn.DropoutWrapper(cell,
                                                 input_keep_prob=1,
                                                 output_keep_prob=1)
            lstm_cells.append(
                tf.contrib.rnn.DropoutWrapper(cell,
                                              input_keep_prob=1.0,
                                              output_keep_prob=1.0))

        multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)

        outputs, states = rnn.static_rnn(multi_cell, x, dtype=tf.float32)
        output = tf.matmul(outputs[-1],
                           layer['weights']) + layer['biases']  # softmax layer
        return output
Esempio n. 12
0
  def benchmarkTfRNNLSTMTraining(self):
    test_configs = self._GetTestConfig()
    for config_name, config in test_configs.items():
      num_layers = config["num_layers"]
      num_units = config["num_units"]
      batch_size = config["batch_size"]
      seq_length = config["seq_length"]

      with ops.Graph().as_default(), ops.device("/cpu"):
        inputs = seq_length * [
            array_ops.zeros([batch_size, num_units], dtypes.float32)
        ]
        initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127)

        cell = rnn_cell.LSTMCell(
            num_units=num_units, initializer=initializer, state_is_tuple=True)
        multi_cell = rnn_cell.MultiRNNCell(
            [cell() for _ in range(num_layers)])
        outputs, final_state = rnn.static_rnn(
            multi_cell, inputs, dtype=dtypes.float32)
        trainable_variables = ops.get_collection(
            ops.GraphKeys.TRAINABLE_VARIABLES)
        gradients = gradients_impl.gradients([outputs, final_state],
                                             trainable_variables)
        training_op = control_flow_ops.group(*gradients)
        self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" %
                          (config_name, self._GetConfigDesc(config)))
Esempio n. 13
0
  def benchmarkTfRNNLSTMBlockCellTraining(self):
    test_configs = self._GetTestConfig()
    for config_name, config in test_configs.items():
      num_layers = config["num_layers"]
      num_units = config["num_units"]
      batch_size = config["batch_size"]
      seq_length = config["seq_length"]

      with ops.Graph().as_default(), ops.device("/cpu"):
        inputs = seq_length * [
            array_ops.zeros([batch_size, num_units], dtypes.float32)
        ]
        cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units)  # pylint: disable=cell-var-from-loop

        multi_cell = rnn_cell.MultiRNNCell(
            [cell() for _ in range(num_layers)])
        outputs, final_state = rnn.static_rnn(
            multi_cell, inputs, dtype=dtypes.float32)
        trainable_variables = ops.get_collection(
            ops.GraphKeys.TRAINABLE_VARIABLES)
        gradients = gradients_impl.gradients([outputs, final_state],
                                             trainable_variables)
        training_op = control_flow_ops.group(*gradients)
        self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" %
                          (config_name, self._GetConfigDesc(config)))
Esempio n. 14
0
  def testEmbeddingAttentionDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: rnn_cell.GRUCell(2)
        cell = cell_fn()
        enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_attention_decoder(
            dec_inp,
            enc_state,
            attn_states,
            cell_fn(),
            num_symbols=4,
            embedding_size=2,
            output_size=3)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 3), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
Esempio n. 15
0
  def testEmbeddingRNNDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: rnn_cell.BasicLSTMCell(2)
        cell = cell_fn()
        _, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_rnn_decoder(
            dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(1, len(res))
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)
Esempio n. 16
0
  def testDynamicAttentionDecoderStateIsTuple(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        cell_fn = lambda: rnn_cell.MultiRNNCell(  # pylint: disable=g-long-lambda
            cells=[rnn_cell.BasicLSTMCell(2) for _ in range(2)])
        cell = cell_fn()
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in enc_outputs
        ], 1)
        dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.attention_decoder(
            dec_inp, enc_state, attn_states, cell_fn(), output_size=4)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 4), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(2, len(res[0]))
        self.assertEqual((2, 2), res[0][0].c.shape)
        self.assertEqual((2, 2), res[0][0].h.shape)
        self.assertEqual((2, 2), res[0][1].c.shape)
        self.assertEqual((2, 2), res[0][1].h.shape)
    def __call__(self,
                 inputs,
                 initial_state=None,
                 dtype=None,
                 sequence_length=None,
                 scope=None):
        is_list = isinstance(inputs, list)
        if self._use_dynamic_rnn:
            if is_list:
                inputs = array_ops.stack(inputs)
            outputs, state = rnn.dynamic_rnn(self._cell,
                                             inputs,
                                             sequence_length=sequence_length,
                                             initial_state=initial_state,
                                             dtype=dtype,
                                             time_major=True,
                                             scope=scope)
            if is_list:
                # Convert outputs back to list
                outputs = array_ops.unstack(outputs)
        else:  # non-dynamic rnn
            if not is_list:
                inputs = array_ops.unstack(inputs)
            outputs, state = rnn.static_rnn(self._cell,
                                            inputs,
                                            initial_state=initial_state,
                                            dtype=dtype,
                                            sequence_length=sequence_length,
                                            scope=scope)
            if not is_list:
                # Convert outputs back to tensor
                outputs = array_ops.stack(outputs)

        return outputs, state
Esempio n. 18
0
def recurrent_neural_network(vector, n_classes, chunk_size, n_chunks):
    """Create the neural network model.

    Args:
        vector: Vector data
        n_classes: Number of classes
        vector_length: Length of vector making up the tensor

    Returns:
        output: Output

    """
    # Initialize key variables
    rnn_size = 128

    layer = {
        'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
        'biases': tf.Variable(tf.random_normal([n_classes]))
    }

    vector = tf.transpose(vector, [1, 0, 2])
    vector = tf.reshape(vector, [-1, chunk_size])
    vector = tf.split(vector, n_chunks, 0)

    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, vector, dtype=tf.float32)
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']

    # Return
    return output
Esempio n. 19
0
    def build_graph(parameters):
        """Build a simple graph with BasicLSTMCell."""

        num_batchs = parameters["num_batchs"]
        time_step_size = parameters["time_step_size"]
        input_vec_size = parameters["input_vec_size"]
        num_cells = parameters["num_cells"]
        inputs_after_split = []
        for i in xrange(time_step_size):
            one_timestamp_input = tf.compat.v1.placeholder(
                dtype=parameters["dtype"],
                name="split_{}".format(i),
                shape=[num_batchs, input_vec_size])
            inputs_after_split.append(one_timestamp_input)
        # Currently lstm identifier has a few limitations: only supports
        # forget_bias == 0, inner state activation == tanh.
        # TODO(zhixianyan): Add another test with forget_bias == 1.
        # TODO(zhixianyan): Add another test with relu as activation.
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_cells,
                                                 forget_bias=0.0,
                                                 state_is_tuple=True)
        cell_outputs, _ = rnn.static_rnn(lstm_cell,
                                         inputs_after_split,
                                         dtype=tf.float32)
        out = cell_outputs[-1]
        return inputs_after_split, [out]
Esempio n. 20
0
  def testEmbeddingRNNDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: rnn_cell.BasicLSTMCell(2)
        cell = cell_fn()
        _, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]
        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_rnn_decoder(
            dec_inp, enc_state, cell_fn(), num_symbols=4, embedding_size=2)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 2), res[0].shape)

        res = sess.run([mem])
        self.assertEqual(1, len(res))
        self.assertEqual((2, 2), res[0].c.shape)
        self.assertEqual((2, 2), res[0].h.shape)
def GRURNN(x, weights, biases):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, nInput])
    x = tf.split(x, nSteps, 0)
    lstmCell = rnn_cell.GRUCell(nHidden)
    outputs, states = rnn.static_rnn(lstmCell, x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Esempio n. 22
0
  def testEmbeddingAttentionDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        cell_fn = lambda: rnn_cell.GRUCell(2)
        cell = cell_fn()
        enc_outputs, enc_state = rnn.static_rnn(cell, inp, dtype=dtypes.float32)
        attn_states = array_ops.concat([
            array_ops.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs
        ], 1)
        dec_inp = [
            constant_op.constant(
                i, dtypes.int32, shape=[2]) for i in range(3)
        ]

        # Use a new cell instance since the attention decoder uses a
        # different variable scope.
        dec, mem = seq2seq_lib.embedding_attention_decoder(
            dec_inp,
            enc_state,
            attn_states,
            cell_fn(),
            num_symbols=4,
            embedding_size=2,
            output_size=3)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 3), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
Esempio n. 23
0
    def recurrent_neural_network_model(self, data, mode):
        lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_chunk_size,
                                           state_is_tuple=True)
        print("Transposing input data to split data")
        print("Input data shape: ", str(data[self.input_feature_tag]))
        x_rnn = tf.transpose(data[self.input_feature_tag], [1, 0, 2])
        print("Transposed data shape: ", str(x_rnn))
        x_rnn = tf.reshape(x_rnn, [-1, self.rnn_chunk_size])
        print("Reshaped data shape: ", str(x_rnn))
        x_rnn = tf.split(x_rnn, self.rnn_chunks, 0)
        print("Split data shape: ", str(x_rnn))
        outputs, states = rnn.static_rnn(lstm_cell, x_rnn, dtype=tf.float32)

        dense = tf.layers.dense(inputs=outputs[-1],
                                units=self.dense_units,
                                activation=self.dense_activation)
        dense_dropout = tf.layers.dropout(
            inputs=dense,
            rate=self.dense_dropout,
            training=mode == tf.estimator.ModeKeys.TRAIN)
        output = tf.layers.dense(inputs=dense_dropout,
                                 units=self.output_classes)
        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            self.prediction_class_key:
            tf.argmax(input=output, axis=1),
            # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            self.prediction_probability_key:
            tf.nn.softmax(output, name=self.prediction_probability_name)
        }

        return output, predictions
Esempio n. 24
0
    def build_graph(parameters):
        """Build a simple graph with BasicLSTMCell."""

        num_batches = parameters["num_batches"]
        time_step_size = parameters["time_step_size"]
        input_vec_size = parameters["input_vec_size"]
        num_cells = parameters["num_cells"]
        inputs_after_split = []
        for i in range(time_step_size):
            one_timestamp_input = tf.placeholder(
                dtype=parameters["dtype"],
                name="split_{}".format(i),
                shape=[num_batches, input_vec_size])
            inputs_after_split.append(one_timestamp_input)
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_cells,
                                                 activation=tf.nn.relu,
                                                 state_is_tuple=True)
        sequence_length = None

        if parameters["use_sequence_length"]:
            # Using different sequence length in each bach, like [1, 2, 3, 3...].
            sequence_length = [
                min(i + 1, time_step_size) for i in range(num_batches)
            ]
        cell_outputs, _ = rnn.static_rnn(lstm_cell,
                                         inputs_after_split,
                                         dtype=tf.float32,
                                         sequence_length=sequence_length)
        out = cell_outputs[-1]
        return inputs_after_split, [out]
Esempio n. 25
0
    def _build(self, incoming, *args, **kwargs):
        """
        Args:
            incoming: `Tensor`. 3-D Tensor [samples, timesteps, input dim].
        """
        self._declare_dependencies()
        sequence_length = None
        if self.dynamic:
            sequence_length = retrieve_seq_length_op(incoming if isinstance(
                incoming, tf.Tensor) else tf.stack(incoming))

        input_shape = get_shape(incoming)

        inference = incoming
        # If a tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array]:
            ndim = len(input_shape)
            assert ndim >= 3, 'Input dim should be at least 3.'
            axes = [1, 0] + list(xrange(2, ndim))
            inference = tf.transpose(inference, (axes))
            inference = tf.unstack(value=inference)

        if self.dynamic:
            outputs, state = tf.nn.dynamic_rnn(
                cell=self._cell,
                inputs=inference,
                dtype=tf.float32,
                initial_state=self.initial_state,
                sequence_length=sequence_length,
                scope=self.module_name)
        else:
            outputs, state = rnn.static_rnn(cell=self._cell,
                                            inputs=inference,
                                            dtype=tf.float32,
                                            initial_state=self.initial_state,
                                            sequence_length=sequence_length,
                                            scope=self.module_name)

        for v in [self._cell.w, self._cell.b]:
            if hasattr(v, '__len__'):
                for var in v:
                    track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name)
            else:
                track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name)

        track(outputs[-1], tf.GraphKeys.ACTIVATIONS, self.module_name)

        if self.dynamic:
            if self.return_seq:
                o = outputs
            else:
                outputs = tf.transpose(tf.stack(outputs), [1, 0, 2])
                o = advanced_indexing_op(outputs, sequence_length)
        else:
            o = outputs if self.return_seq else outputs[-1]

        track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name)

        return (o, state) if self.return_state else o
def recurrent_neural_network(x, weights, biases):
    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, chunk_size)
    x = tf.unstack(x, n_steps, 1)
    lstm_cell = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(n_hidden,
                                                           forget_bias=1.0)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['w']) + biases['b']
Esempio n. 27
0
def rnn_model(x, weights, biases):
    """RNN (LSTM or GRU) model for image"""
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_steps, 0)
    lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights) + biases
    def model(self, data):
        data = tf.transpose(data, (1, 0, 2))
        data = tf.reshape(data, (-1, self.chunk_size))
        data = tf.split(data, self.n_chunks, 0)

        outputs, _ = rnn.static_rnn(self.lstm_cell, data, dtype=tf.float32)

        return tf.add(tf.matmul(outputs[-1], self.layer['weights']),
                      self.layer['biases'])
Esempio n. 29
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                beam_search=True,
                                beam_size=10):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = copy.deepcopy(cell)
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = core_rnn_cell.OutputProjectionWrapper(
                cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        return embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous,
            initial_state_attention=initial_state_attention,
            beam_search=beam_search,
            beam_size=beam_size)
def lstm(x):
    n_classes = 10
    rnn_size = 4
    layer = {'weights':tf.Variable(tf.random_normal([rnn_size,n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))}

    x = tf.expand_dims(x, 2)
    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell,  tf.unstack(tf.transpose(x, perm=[1, 0, 2])), dtype=tf.float32)
    output = tf.matmul(outputs[-1],layer['weights']) + layer['biases']
    return output
Esempio n. 31
0
def recurrent_neural_network(x):
    x = tf.transpose(x, [1,0,2])
    x = tf.reshape(x, [-1, chunk_size])
    x = tf.split(x, n_chunks, 0)
    l1 = tf.add(tf.matmul(x,hidden_1_layer['weight']), hidden_1_layer['bias'])
    l1 = tf.nn.relu(l1)
    l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias'])
    l2 = tf.nn.tanh(l2)#relu(l2)
    lstm_cell = rnn.BasicLSTMCell(rnn_size)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    output = tf.matmul(outputs[-1],output_layer['weight']) + output_layer['bias']
Esempio n. 32
0
 def rnn_encoder(self,
                 encoder_inputs,
                 cell,
                 dtype=dtypes.float32,
                 scope=None):
     with tf.variable_scope(scope or "basic_rnn_seq2seq"):
         enc_cell = copy.deepcopy(cell)
         enc_output, enc_state = rnn.static_rnn(enc_cell,
                                                encoder_inputs,
                                                dtype=dtype)
         return enc_output, enc_state
Esempio n. 33
0
    def build_lstm_graph(self):
        """
        Build the lstm graph without the input data
        :return: the graph
        """
        tf.reset_default_graph()
        lstm_graph = tf.Graph()

        with lstm_graph.as_default():
            self.xx = tf.placeholder('float32', [None, 1, self.n_features],
                                     name='features')
            self.yy = tf.placeholder('float32', name='labels')
            self.bins = tf.constant(self.bins, name='bins')
            with tf.name_scope("output_layer"):
                weight = tf.Variable(tf.random_normal(
                    [self._lstm_size, self.n_labels]),
                                     name='weights')
                biases = tf.Variable(tf.random_normal([self.n_labels]),
                                     name='biases')
                x = tf.transpose(self.xx, [1, 0, 2])
                x = tf.reshape(x, [-1, self.n_features])
                x = tf.split(x, 1)

                lstm_cell = rnn_cell.LSTMCell(self._lstm_size,
                                              name='basic_lstm_cell')
                outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

                logits = tf.add(tf.matmul(outputs[-1], weight),
                                biases,
                                name='rnn_model')

                tf.summary.histogram("last_lstm_output", outputs[-1])
                tf.summary.histogram("weights", weight)
                tf.summary.histogram("biases", biases)

            with tf.name_scope("train"):
                correct = tf.equal(tf.argmax(logits, 1), tf.argmax(self.yy, 1))
                accuracy = tf.reduce_mean(tf.cast(correct, 'float'),
                                          name='accuracy')
                loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                               labels=self.yy),
                    name='loss')
                tf.train.AdamOptimizer().minimize(
                    loss, name="loss_mse_adam_minimize")
                tf.summary.scalar("loss", loss)
                tf.summary.scalar("accuracy", accuracy)

            # Operators to use after restoring the model
            for op in [logits, loss]:
                tf.add_to_collection('ops_to_restore', op)

        return lstm_graph
def recurrent_neural_network(data):
    layer = {'weights': tf.Variable(tf.random_normal([rnn_size, n_classes])),
             'biases': tf.Variable(tf.random_normal([n_classes]))}
    data = tf.transpose(data, [1, 0, 2])
    data = tf.reshape(data, [-1, chunk_size])
    data = tf.split(data, n_chunks, 0)

    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    outputs, states = rnn.static_rnn(lstm_cell, data, dtype=tf.float32)
    # (input_data * weights) + biases 
    output = tf.matmul(outputs[-1], layer['weights']) + layer['biases']
    return output
Esempio n. 35
0
    def _build(self, incoming, *args, **kwargs):
        """
        Args:
            incoming: `Tensor`. 3-D Tensor [samples, timesteps, input dim].
        """
        self._declare_dependencies()
        sequence_length = kwargs.get('sequence_length')
        if self.dynamic and sequence_length is None:
            sequence_length = retrieve_seq_length_op(
                incoming if isinstance(incoming, tf.Tensor) else tf.stack(incoming))

        input_shape = get_shape(incoming)

        inference = incoming
        # If a static rnn and tensor given, convert it to a per timestep list
        if type(inference) not in [list, np.array] and not self.dynamic:
            ndim = len(input_shape)
            assert ndim >= 3, 'Input dim should be at least 3.'
            axes = [1, 0] + list(xrange(2, ndim))
            inference = tf.transpose(inference, axes)
            inference = tf.unstack(value=inference)

        if self.dynamic:
            outputs, state = tf.nn.dynamic_rnn(
                cell=self._cell, inputs=inference, dtype=tf.float32,
                initial_state=self.initial_state, sequence_length=sequence_length,
                scope=self.module_name)
        else:
            outputs, state = rnn.static_rnn(
                cell=self._cell, inputs=inference, dtype=tf.float32,
                initial_state=self.initial_state, sequence_length=sequence_length,
                scope=self.module_name)

        for v in [self._cell.w, self._cell.b]:
            if hasattr(v, '__len__'):
                for var in v:
                    track(var, tf.GraphKeys.LAYER_VARIABLES, self.module_name)
            else:
                track(v, tf.GraphKeys.LAYER_VARIABLES, self.module_name)

        track(outputs[-1], tf.GraphKeys.ACTIVATIONS, self.module_name)

        if self.dynamic:
            if self.return_seq:
                o = outputs
            else:
                o = get_sequence_relevant_output(outputs, sequence_length)
        else:
            o = outputs if self.return_seq else outputs[-1]

        track(o, tf.GraphKeys.LAYER_TENSOR, self.module_name)

        return (o, state) if self.return_state else o
Esempio n. 36
0
def neural_network(x):
	layer = {'weights' : tf.Variable(tf.random_normal([rnn_size, n_classes])), 'biases' : tf.Variable(tf.random_normal([n_classes]))}

	x = tf.transpose(x, [1,0,2])
	x = tf.reshape(x, [-1, chunk_size])
	x = tf.split(x, n_chunks)
	
	lstm = rnn_cell.BasicLSTMCell(rnn_size)
	outputs, states = rnn.static_rnn(lstm, x, dtype = tf.float32)

	output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'], name = 'output')

	return output
Esempio n. 37
0
  def testRNNDecoder(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)):
        inp = [constant_op.constant(0.5, shape=[2, 2])] * 2
        _, enc_state = rnn.static_rnn(
            rnn_cell.GRUCell(2), inp, dtype=dtypes.float32)
        dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3
        cell = core_rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4)
        dec, mem = seq2seq_lib.rnn_decoder(dec_inp, enc_state, cell)
        sess.run([variables.global_variables_initializer()])
        res = sess.run(dec)
        self.assertEqual(3, len(res))
        self.assertEqual((2, 4), res[0].shape)

        res = sess.run([mem])
        self.assertEqual((2, 2), res[0].shape)
  def _build_lstm_model(self, number_of_layers):
    batch_size = 8
    dim = 10
    inputs = variables.Variable(random_ops.random_normal([batch_size, dim]))

    def lstm_cell():
      return rnn_cells.MaskedBasicLSTMCell(
          dim, forget_bias=0.0, state_is_tuple=True, reuse=False)

    cell = tf_rnn_cells.MultiRNNCell(
        [lstm_cell() for _ in range(number_of_layers)], state_is_tuple=True)

    outputs = rnn.static_rnn(
        cell, [inputs],
        initial_state=cell.zero_state(batch_size, dtypes.float32))

    return outputs
Esempio n. 39
0
def _half_seq_len_vs_unroll_half_rnn_benchmark(inputs_list_t, sequence_length):
  (_, input_size) = inputs_list_t[0].get_shape().as_list()
  initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127)
  cell = rnn_cell_impl.LSTMCell(
      num_units=input_size,
      use_peepholes=True,
      initializer=initializer,
      state_is_tuple=False)
  outputs, final_state = rnn.static_rnn(
      cell,
      inputs_list_t,
      sequence_length=sequence_length,
      dtype=dtypes.float32)

  trainable_variables = ops_lib.get_collection(
      ops_lib.GraphKeys.TRAINABLE_VARIABLES)
  gradients = gradients_impl.gradients(outputs + [final_state],
                                       trainable_variables)

  return control_flow_ops.group(final_state, *(gradients + outputs))
Esempio n. 40
0
  def testGrid3LSTMCellReLUWithRNN(self):
    batch_size = 3
    input_size = 5
    max_length = 6  # unrolled up to this length
    num_units = 2

    with variable_scope.variable_scope(
        'root', initializer=init_ops.constant_initializer(0.5)):
      cell = grid_rnn_cell.Grid3LSTMCell(
          num_units=num_units, non_recurrent_fn=nn_ops.relu)

      inputs = max_length * [
          array_ops.placeholder(dtypes.float32, shape=(batch_size, input_size))
      ]

      outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)

    self.assertEqual(len(outputs), len(inputs))
    self.assertEqual(state[0].c.get_shape(), (batch_size, 2))
    self.assertEqual(state[0].h.get_shape(), (batch_size, 2))
    self.assertEqual(state[1].c.get_shape(), (batch_size, 2))
    self.assertEqual(state[1].h.get_shape(), (batch_size, 2))

    for out, inp in zip(outputs, inputs):
      self.assertEqual(len(out), 1)
      self.assertEqual(out[0].get_shape()[0], inp.get_shape()[0])
      self.assertEqual(out[0].get_shape()[1], num_units)
      self.assertEqual(out[0].dtype, inp.dtype)

    with self.cached_session() as sess:
      sess.run(variables.global_variables_initializer())

      input_value = np.ones((batch_size, input_size))
      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
      for tp in values[:-1]:
        for v in tp:
          self.assertTrue(np.all(np.isfinite(v)))
      for tp in values[-1]:
        for st in tp:
          for v in st:
            self.assertTrue(np.all(np.isfinite(v)))
Esempio n. 41
0
  def testGrid1LSTMCellWithRNN(self):
    batch_size = 3
    input_size = 5
    max_length = 6  # unrolled up to this length
    num_units = 2

    with variable_scope.variable_scope(
        'root', initializer=init_ops.constant_initializer(0.5)):
      cell = grid_rnn_cell.Grid1LSTMCell(num_units=num_units)

      # for 1-LSTM, we only feed the first step
      inputs = ([
          array_ops.placeholder(
              dtypes.float32, shape=(batch_size, input_size))
      ] + (max_length - 1) * [array_ops.zeros([batch_size, input_size])])

      outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)

    self.assertEqual(len(outputs), len(inputs))
    self.assertEqual(state[0].c.get_shape(), (batch_size, 2))
    self.assertEqual(state[0].h.get_shape(), (batch_size, 2))

    for out, inp in zip(outputs, inputs):
      self.assertEqual(len(out), 1)
      self.assertEqual(out[0].get_shape(), (3, num_units))
      self.assertEqual(out[0].dtype, inp.dtype)

    with self.test_session() as sess:
      sess.run(variables.global_variables_initializer())

      input_value = np.ones((batch_size, input_size))
      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
      for tp in values[:-1]:
        for v in tp:
          self.assertTrue(np.all(np.isfinite(v)))
      for tp in values[-1]:
        for st in tp:
          for v in st:
            self.assertTrue(np.all(np.isfinite(v)))
Esempio n. 42
0
  def testCompatibleNames(self):
    with self.test_session(use_gpu=True, graph=ops.Graph()):
      cell = rnn_cell.LSTMCell(10)
      pcell = rnn_cell.LSTMCell(10, use_peepholes=True)
      inputs = [array_ops.zeros([4, 5])] * 6
      rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic")
      rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole")
      basic_names = {
          v.name: v.get_shape()
          for v in variables.trainable_variables()
      }

    with self.test_session(use_gpu=True, graph=ops.Graph()):
      cell = lstm_ops.LSTMBlockCell(10)
      pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True)
      inputs = [array_ops.zeros([4, 5])] * 6
      rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic")
      rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole")
      block_names = {
          v.name: v.get_shape()
          for v in variables.trainable_variables()
      }

    with self.test_session(use_gpu=True, graph=ops.Graph()):
      cell = lstm_ops.LSTMBlockFusedCell(10)
      pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True)
      inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6)
      cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell")
      pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell")
      fused_names = {
          v.name: v.get_shape()
          for v in variables.trainable_variables()
      }

    self.assertEqual(basic_names, block_names)
    self.assertEqual(basic_names, fused_names)
Esempio n. 43
0
  def testStaticRNNWithKerasSimpleRNNCell(self):
    with self.cached_session() as sess:
      input_shape = 10
      output_shape = 5
      timestep = 4
      batch = 100
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      x_train = np.transpose(x_train, (1, 0, 2))
      y_train = keras.utils.to_categorical(y_train)
      cell = keras.layers.SimpleRNNCell(output_shape)

      inputs = [array_ops.placeholder(
          dtypes.float32, shape=(None, input_shape))] * timestep
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape))

      outputs, state = rnn.static_rnn(
          cell, inputs, dtype=dtypes.float32)
      self.assertEqual(len(outputs), timestep)
      self.assertEqual(outputs[0].shape.as_list(), [None, output_shape])
      self.assertEqual(state.shape.as_list(), [None, output_shape])
      loss = losses.softmax_cross_entropy(predict, state)
      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)

      sess.run([variables_lib.global_variables_initializer()])
      feed_dict = {i: d for i, d in zip(inputs, x_train)}
      feed_dict[predict] = y_train
      _, outputs, state = sess.run(
          [train_op, outputs, state], feed_dict)

      self.assertEqual(len(outputs), timestep)
      self.assertEqual(len(outputs[0]), batch)
      self.assertEqual(len(state), batch)
Esempio n. 44
0
  def testGrid2LSTMCellWithRNNAndDynamicBatchSize(self):
    """Test for #4296."""
    input_size = 5
    max_length = 6  # unrolled up to this length
    num_units = 2

    with variable_scope.variable_scope(
        'root', initializer=init_ops.constant_initializer(0.5)):
      cell = grid_rnn_cell.Grid2LSTMCell(num_units=num_units)

      inputs = max_length * [
          array_ops.placeholder(dtypes.float32, shape=(None, input_size))
      ]

      outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)

    self.assertEqual(len(outputs), len(inputs))

    for out, inp in zip(outputs, inputs):
      self.assertEqual(len(out), 1)
      self.assertTrue(out[0].get_shape()[0].value is None)
      self.assertEqual(out[0].get_shape()[1], num_units)
      self.assertEqual(out[0].dtype, inp.dtype)

    with self.cached_session() as sess:
      sess.run(variables.global_variables_initializer())

      input_value = np.ones((3, input_size))
      values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value})
      for tp in values[:-1]:
        for v in tp:
          self.assertTrue(np.all(np.isfinite(v)))
      for tp in values[-1]:
        for st in tp:
          for v in st:
            self.assertTrue(np.all(np.isfinite(v)))
Esempio n. 45
0
  def __call__(self,
               inputs,
               initial_state=None,
               dtype=None,
               sequence_length=None,
               scope=None):
    is_list = isinstance(inputs, list)
    if self._use_dynamic_rnn:
      if is_list:
        inputs = array_ops.stack(inputs)
      outputs, state = rnn.dynamic_rnn(
          self._cell,
          inputs,
          sequence_length=sequence_length,
          initial_state=initial_state,
          dtype=dtype,
          time_major=True,
          scope=scope)
      if is_list:
        # Convert outputs back to list
        outputs = array_ops.unstack(outputs)
    else:  # non-dynamic rnn
      if not is_list:
        inputs = array_ops.unstack(inputs)
      outputs, state = rnn.static_rnn(
          self._cell,
          inputs,
          initial_state=initial_state,
          dtype=dtype,
          sequence_length=sequence_length,
          scope=scope)
      if not is_list:
        # Convert outputs back to tensor
        outputs = array_ops.stack(outputs)

    return outputs, state
Esempio n. 46
0
  def testLSTMBasicToBlockPeeping(self):
    with self.test_session(use_gpu=True) as sess:
      batch_size = 2
      input_size = 3
      cell_size = 4
      sequence_length = 5

      inputs = []
      for _ in range(sequence_length):
        inp = ops.convert_to_tensor(
            np.random.randn(batch_size, input_size), dtype=dtypes.float32)
        inputs.append(inp)

      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890212)
      with variable_scope.variable_scope("basic", initializer=initializer):
        cell = rnn_cell.LSTMCell(
            cell_size, use_peepholes=True, state_is_tuple=True)
        outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32)

        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_state = sess.run([outputs, state[0]])
        basic_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(outputs, variables.trainable_variables()))

      with variable_scope.variable_scope("block", initializer=initializer):
        w = variable_scope.get_variable(
            "w",
            shape=[input_size + cell_size, cell_size * 4],
            dtype=dtypes.float32)
        b = variable_scope.get_variable(
            "b",
            shape=[cell_size * 4],
            dtype=dtypes.float32,
            initializer=init_ops.zeros_initializer())

        wci = variable_scope.get_variable(
            "wci", shape=[cell_size], dtype=dtypes.float32)
        wcf = variable_scope.get_variable(
            "wcf", shape=[cell_size], dtype=dtypes.float32)
        wco = variable_scope.get_variable(
            "wco", shape=[cell_size], dtype=dtypes.float32)

        _, _, _, _, _, _, outputs = block_lstm(
            ops.convert_to_tensor(
                sequence_length, dtype=dtypes.int64),
            inputs,
            w,
            b,
            wci=wci,
            wcf=wcf,
            wco=wco,
            cell_clip=0,
            use_peephole=True)

        sess.run([variables.global_variables_initializer()])
        block_outputs = sess.run(outputs)
        block_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        block_wgrads = sess.run(
            gradients_impl.gradients(outputs, [w, b, wci, wcf, wco]))

      self.assertAllClose(basic_outputs, block_outputs)
      self.assertAllClose(basic_grads, block_grads)
      for basic, block in zip(basic_wgrads, block_wgrads):
        self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)

      with variable_scope.variable_scope("fused", initializer=initializer):
        cell = lstm_ops.LSTMBlockFusedCell(
            cell_size, cell_clip=0, use_peephole=True)
        outputs, state = cell(inputs, dtype=dtypes.float32)

        sess.run([variables.global_variables_initializer()])
        fused_outputs, fused_state = sess.run([outputs, state[0]])
        fused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused/")
        ]
        fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars))

      self.assertAllClose(basic_outputs, fused_outputs)
      self.assertAllClose(basic_state, fused_state)
      self.assertAllClose(basic_grads, fused_grads)
      for basic, fused in zip(basic_wgrads, fused_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
Esempio n. 47
0
def blocks_match(sess, use_peephole):
  batch_size = 2
  input_size = 3
  cell_size = 4
  sequence_length = 4

  inputs = []
  for _ in range(sequence_length):
    inp = ops.convert_to_tensor(
        np.random.randn(batch_size, input_size), dtype=dtypes.float32)
    inputs.append(inp)
  stacked_inputs = array_ops.stack(inputs)

  initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212)

  with variable_scope.variable_scope("test", initializer=initializer):
    # magic naming so that the cells pick up these variables and reuse them
    if use_peephole:
      wci = variable_scope.get_variable(
          "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtypes.float32)
      wcf = variable_scope.get_variable(
          "rnn/lstm_cell/w_f_diag", shape=[cell_size], dtype=dtypes.float32)
      wco = variable_scope.get_variable(
          "rnn/lstm_cell/w_o_diag", shape=[cell_size], dtype=dtypes.float32)

    w = variable_scope.get_variable(
        "rnn/lstm_cell/kernel",
        shape=[input_size + cell_size, cell_size * 4],
        dtype=dtypes.float32)
    b = variable_scope.get_variable(
        "rnn/lstm_cell/bias",
        shape=[cell_size * 4],
        dtype=dtypes.float32,
        initializer=init_ops.zeros_initializer())

    basic_cell = rnn_cell.LSTMCell(
        cell_size, use_peepholes=use_peephole, state_is_tuple=True, reuse=True)
    basic_outputs_op, basic_state_op = rnn.static_rnn(
        basic_cell, inputs, dtype=dtypes.float32)

    if use_peephole:
      _, _, _, _, _, _, block_outputs_op = block_lstm(
          ops.convert_to_tensor(sequence_length, dtype=dtypes.int64),
          inputs,
          w,
          b,
          wci=wci,
          wcf=wcf,
          wco=wco,
          cell_clip=0,
          use_peephole=True)
    else:
      _, _, _, _, _, _, block_outputs_op = block_lstm(
          ops.convert_to_tensor(sequence_length, dtype=dtypes.int64),
          inputs,
          w,
          b,
          cell_clip=0)

    fused_cell = lstm_ops.LSTMBlockFusedCell(
        cell_size, cell_clip=0, use_peephole=use_peephole, reuse=True,
        name="rnn/lstm_cell")
    fused_outputs_op, fused_state_op = fused_cell(
        stacked_inputs, dtype=dtypes.float32)

    sess.run([variables.global_variables_initializer()])
    basic_outputs, basic_state = sess.run([basic_outputs_op, basic_state_op[0]])
    basic_grads = sess.run(gradients_impl.gradients(basic_outputs_op, inputs))
    xs = [w, b]
    if use_peephole:
      xs += [wci, wcf, wco]
    basic_wgrads = sess.run(gradients_impl.gradients(basic_outputs_op, xs))

    block_outputs = sess.run(block_outputs_op)
    block_grads = sess.run(gradients_impl.gradients(block_outputs_op, inputs))
    block_wgrads = sess.run(gradients_impl.gradients(block_outputs_op, xs))

    xs = [w, b]
    if use_peephole:
      xs += [wci, wcf, wco]
    fused_outputs, fused_state = sess.run([fused_outputs_op, fused_state_op[0]])
    fused_grads = sess.run(gradients_impl.gradients(fused_outputs_op, inputs))
    fused_wgrads = sess.run(gradients_impl.gradients(fused_outputs_op, xs))

    return (basic_state, fused_state, basic_outputs, block_outputs,
            fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads,
            block_wgrads, fused_wgrads)
Esempio n. 48
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                enc_cell,
                                dec_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: tf.nn.rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.

    encoder_cell = enc_cell

    encoder_cell = core_rnn_cell.EmbeddingWrapper(
        encoder_cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    encoder_outputs, encoder_state = rnn.static_rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [
        array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs
    ]
    attention_states = array_ops.concat(top_states, 1)

    # Decoder.
    output_size = None
    if output_projection is None:
      dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return seq2seq.embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          dec_cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse):
        outputs, state = seq2seq.embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            dec_cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(
          structure=encoder_state, flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
Esempio n. 49
0
  def testBasicRNNFusedWrapper(self):
    """This test checks that using a wrapper for BasicRNN works as expected."""

    with self.cached_session() as sess:
      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890212)
      cell = rnn_cell.BasicRNNCell(10)
      batch_size = 5
      input_size = 20
      timelen = 15
      inputs = constant_op.constant(
          np.random.randn(timelen, batch_size, input_size))
      with variable_scope.variable_scope("basic", initializer=initializer):
        unpacked_inputs = array_ops.unstack(inputs)
        outputs, state = rnn.static_rnn(
            cell, unpacked_inputs, dtype=dtypes.float64)
        packed_outputs = array_ops.stack(outputs)
        basic_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("basic/")
        ]
        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_state = sess.run([packed_outputs, state])
        basic_grads = sess.run(gradients_impl.gradients(packed_outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(packed_outputs, basic_vars))

      with variable_scope.variable_scope(
          "fused_static", initializer=initializer):
        fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(
            rnn_cell.BasicRNNCell(10))
        outputs, state = fused_cell(inputs, dtype=dtypes.float64)
        fused_static_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused_static/")
        ]
        sess.run([variables.global_variables_initializer()])
        fused_static_outputs, fused_static_state = sess.run([outputs, state])
        fused_static_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_static_wgrads = sess.run(
            gradients_impl.gradients(outputs, fused_static_vars))

      self.assertAllClose(basic_outputs, fused_static_outputs)
      self.assertAllClose(basic_state, fused_static_state)
      self.assertAllClose(basic_grads, fused_static_grads)
      for basic, fused in zip(basic_wgrads, fused_static_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)

      with variable_scope.variable_scope(
          "fused_dynamic", initializer=initializer):
        fused_cell = fused_rnn_cell.FusedRNNCellAdaptor(
            rnn_cell.BasicRNNCell(10), use_dynamic_rnn=True)
        outputs, state = fused_cell(inputs, dtype=dtypes.float64)
        fused_dynamic_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused_dynamic/")
        ]
        sess.run([variables.global_variables_initializer()])
        fused_dynamic_outputs, fused_dynamic_state = sess.run([outputs, state])
        fused_dynamic_grads = sess.run(
            gradients_impl.gradients(outputs, inputs))
        fused_dynamic_wgrads = sess.run(
            gradients_impl.gradients(outputs, fused_dynamic_vars))

      self.assertAllClose(basic_outputs, fused_dynamic_outputs)
      self.assertAllClose(basic_state, fused_dynamic_state)
      self.assertAllClose(basic_grads, fused_dynamic_grads)
      for basic, fused in zip(basic_wgrads, fused_dynamic_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)
Esempio n. 50
0
  def testLSTMFusedSequenceLengths(self):
    """Verify proper support for sequence lengths in LSTMBlockFusedCell."""
    with self.test_session(use_gpu=True) as sess:
      batch_size = 3
      input_size = 4
      cell_size = 5
      max_sequence_length = 6

      inputs = []
      for _ in range(max_sequence_length):
        inp = ops.convert_to_tensor(
            np.random.randn(batch_size, input_size), dtype=dtypes.float32)
        inputs.append(inp)
      seq_lengths = constant_op.constant([3, 4, 5])

      initializer = init_ops.random_uniform_initializer(
          -0.01, 0.01, seed=19890213)
      with variable_scope.variable_scope("basic", initializer=initializer):
        cell = rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
        outputs, state = rnn.static_rnn(
            cell, inputs, dtype=dtypes.float32, sequence_length=seq_lengths)
        sess.run([variables.global_variables_initializer()])
        basic_outputs, basic_state = sess.run([outputs, state[0]])
        basic_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        basic_wgrads = sess.run(
            gradients_impl.gradients(outputs, variables.trainable_variables()))

      with variable_scope.variable_scope("fused", initializer=initializer):
        cell = lstm_ops.LSTMBlockFusedCell(
            cell_size, cell_clip=0, use_peephole=False)
        outputs, state = cell(
            inputs, dtype=dtypes.float32, sequence_length=seq_lengths)

        sess.run([variables.global_variables_initializer()])
        fused_outputs, fused_state = sess.run([outputs, state[0]])
        fused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        fused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("fused/")
        ]
        fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars))

      self.assertAllClose(basic_outputs, fused_outputs)
      self.assertAllClose(basic_state, fused_state)
      self.assertAllClose(basic_grads, fused_grads)
      for basic, fused in zip(basic_wgrads, fused_wgrads):
        self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2)

      # Verify that state propagation works if we turn our sequence into
      # tiny (single-time) subsequences, i.e. unfuse the cell
      with variable_scope.variable_scope(
          "unfused", initializer=initializer) as vs:
        cell = lstm_ops.LSTMBlockFusedCell(
            cell_size, cell_clip=0, use_peephole=False)
        outputs = []
        state = None
        for i, inp in enumerate(inputs):
          lengths = [int(i < l) for l in seq_lengths.eval()]
          output, state = cell(
              [inp],
              initial_state=state,
              dtype=dtypes.float32,
              sequence_length=lengths)
          vs.reuse_variables()
          outputs.append(output[0])
        outputs = array_ops.stack(outputs)

        sess.run([variables.global_variables_initializer()])
        unfused_outputs, unfused_state = sess.run([outputs, state[0]])
        unfused_grads = sess.run(gradients_impl.gradients(outputs, inputs))
        unfused_vars = [
            v for v in variables.trainable_variables()
            if v.name.startswith("unfused/")
        ]
        unfused_wgrads = sess.run(
            gradients_impl.gradients(outputs, unfused_vars))

      self.assertAllClose(basic_outputs, unfused_outputs)
      self.assertAllClose(basic_state, unfused_state)
      self.assertAllClose(basic_grads, unfused_grads)
      for basic, unfused in zip(basic_wgrads, unfused_wgrads):
        self.assertAllClose(basic, unfused, rtol=1e-2, atol=1e-2)