def __init__(self, hidden_size, keep_prob):
     self.hidden_size = hidden_size  # this should be 2 * self.FLAGS.hidden_size
     self.keep_prob = keep_prob
     self.fwd = DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size / 2),
                               input_keep_prob=self.keep_prob)
     self.back = DropoutWrapper(rnn_cell.LSTMCell(self.hidden_size / 2),
                                input_keep_prob=self.keep_prob)
Beispiel #2
0
    def compute_states(self,emb):
        def unpack_sequence(tensor):
            return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2]))



        with tf.variable_scope("Composition",initializer=
                               tf.contrib.layers.xavier_initializer(),regularizer=
                               tf.contrib.layers.l2_regularizer(self.reg)):
            cell_fw = rnn_cell.LSTMCell(self.hidden_dim)
            cell_bw = rnn_cell.LSTMCell(self.hidden_dim)
            #tf.cond(tf.less(self.dropout
            #if tf.less(self.dropout, tf.constant(1.0)):
            cell_fw = rnn_cell.DropoutWrapper(cell_fw,
                                           output_keep_prob=self.dropout,input_keep_prob=self.dropout)
            cell_bw=rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=self.dropout,input_keep_prob=self.dropout)

            #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32)
            outputs,_,_=rnn.bidirectional_rnn(cell_fw,cell_bw,unpack_sequence(emb),sequence_length=self.lngths,dtype=tf.float32)
            #output = pack_sequence(outputs)
        sum_out=tf.reduce_sum(tf.stack(outputs),[0])
        sent_rep = tf.div(sum_out,tf.expand_dims(tf.to_float(self.lngths),1))



        final_state=sent_rep
        return final_state
Beispiel #3
0
def RNN(x, is_training, weights, biases):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(0, n_time_step, x)

    lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8)
    lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8)

    if is_training and keep_prob < 1:
        lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1,
                                              output_keep_prob=keep_prob)
        lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2,
                                              output_keep_prob=keep_prob)

    cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2])

    #if is_training and keep_prob < 1:
    #    x = tf.nn.dropout(x,keep_prob)

    #initial_state = cell.zero_state(batch_size,tf.float32)
    #state = initial_state
    output = []
    output, states = rnn.rnn(cell, x, dtype=tf.float32)
    #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2])
    #maybe a softmax
    return tf.matmul(output[-1], weights['out']) + biases['out']
Beispiel #4
0
def bidir_ltsm(x):
    with tf.name_scope('Weights'):
        # Permuting batch_size and n_steps
        #x = tf.transpose(x, [1, 0, 2])
        # Reshape to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, model.num_features])
        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        x = tf.split(0, model.max_timesteps, x)

        weights_out1 = tf.Variable(tf.truncated_normal([2, num_hidden], stddev=np.sqrt(1./num_hidden)), name='weights')
        biases_out1 = tf.Variable(tf.zeros(num_hidden), name='biases')
        weights_out2 = tf.Variable(tf.truncated_normal([2, num_hidden], stddev=np.sqrt(1./num_hidden)), name='weights')
        biases_out2 = tf.Variable(tf.zeros(num_hidden), name='biases')

    with tf.name_scope('LTSM'):
        forward = rnn_cell.LSTMCell(num_hidden, use_peepholes=True, forget_bias=1.0)
        backward = rnn_cell.LSTMCell(num_hidden, use_peepholes=True, forget_bias=1.0)

    with tf.name_scope('Bidirectionrnn'):
        bidirectional_h1, _, _ = bidirectional_rnn(forward, backward, x, dtype=tf.float32)
        bd_h1s = [tf.reshape(t, [batch_size, 2, num_hidden]) for t in bidirectional_h1]

    with tf.name_scope('logits'):
        weights_class = tf.Variable(tf.truncated_normal([num_hidden, model.num_classes], stddev=np.sqrt(1./num_hidden)), name='weights')
        biases_class = tf.Variable(tf.zeros([model.num_classes]))
        out_h1 = [tf.reduce_sum(tf.mul(t, weights_out1), reduction_indices=1) + biases_out1 for t in bd_h1s]
        logits = [tf.matmul(t, weights_class) + biases_class for t in out_h1]

        logits3d = tf.pack(logits)

    return logits3d
Beispiel #5
0
    def _createStackBidirectionalDynamicRNN(self,
                                            use_gpu,
                                            use_shape,
                                            use_state_tuple,
                                            initial_states_fw=None,
                                            initial_states_bw=None,
                                            scope=None):
        self.layers = [2, 3]
        input_size = 5
        batch_size = 2
        max_length = 8

        initializer = init_ops.random_uniform_initializer(-0.01,
                                                          0.01,
                                                          seed=self._seed)
        sequence_length = array_ops.placeholder(dtypes.int64)

        self.cells_fw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]
        self.cells_bw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]

        inputs = max_length * [
            array_ops.placeholder(
                dtypes.float32,
                shape=(batch_size, input_size) if use_shape else
                (None, input_size))
        ]
        inputs_c = array_ops.stack(inputs)
        inputs_c = array_ops.transpose(inputs_c, [1, 0, 2])
        outputs, st_fw, st_bw = contrib_rnn.stack_bidirectional_dynamic_rnn(
            self.cells_fw,
            self.cells_bw,
            inputs_c,
            initial_states_fw=initial_states_fw,
            initial_states_bw=initial_states_bw,
            dtype=dtypes.float32,
            sequence_length=sequence_length,
            scope=scope)

        # Outputs has shape (batch_size, max_length, 2* layer[-1].
        output_shape = [None, max_length, 2 * self.layers[-1]]
        if use_shape:
            output_shape[0] = batch_size

        self.assertAllEqual(outputs.get_shape().as_list(), output_shape)

        input_value = np.random.randn(batch_size, input_size)

        return input_value, inputs, outputs, st_fw, st_bw, sequence_length
Beispiel #6
0
 def bilstm_layer(self, inputs):
     # bidirectional lstm layer for feature extration
     with tf.variable_scope("BiLSTM"):
         fw_cell = rnn_cell.LSTMCell(self.params.word_hidden_dim,
                                     use_peepholes=True,
                                     initializer=self.initializer())
         bw_cell = rnn_cell.LSTMCell(self.params.word_hidden_dim,
                                     use_peepholes=True,
                                     initializer=self.initializer())
         length64 = tf.cast(self.lengths, tf.int64)
         forward_output, _ = tf.nn.dynamic_rnn(fw_cell,
                                               inputs,
                                               dtype=tf.float32,
                                               sequence_length=self.lengths,
                                               scope="fw")
         backward_output, _ = tf.nn.dynamic_rnn(
             bw_cell,
             tf.reverse_sequence(inputs, length64, seq_dim=1),
             dtype=tf.float32,
             sequence_length=self.lengths,
             scope="bw")
         backward_output = tf.reverse_sequence(backward_output,
                                               length64,
                                               seq_dim=1)
         # concat forward and backward outputs into a 2*hiddenSize vector
         outputs = tf.concat(2, [forward_output, backward_output])
         lstm_features = tf.reshape(outputs,
                                    [-1, self.params.word_hidden_dim * 2])
         return lstm_features
Beispiel #7
0
    def __init__(self, hidden_size, keep_prob):
        """
        Inputs:
          hidden_size: int. Hidden size of the RNN
          keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
        """
        self.hidden_size = hidden_size
        self.keep_prob = keep_prob

        #self.rnn_cell_fw = rnn_cell.GRUCell(self.hidden_size)
        self.rnn_cell_fw_layer1 = rnn_cell.LSTMCell(self.hidden_size)
        self.rnn_cell_fw_layer1 = DropoutWrapper(
            self.rnn_cell_fw_layer1, input_keep_prob=self.keep_prob)

        #self.rnn_cell_bw = rnn_cell.GRUCell(self.hidden_size)
        self.rnn_cell_bw_layer1 = rnn_cell.LSTMCell(self.hidden_size)
        self.rnn_cell_bw_layer1 = DropoutWrapper(
            self.rnn_cell_bw_layer1, input_keep_prob=self.keep_prob)

        self.rnn_cell_fw_layer2 = rnn_cell.LSTMCell(self.hidden_size)
        self.rnn_cell_fw_layer2 = DropoutWrapper(
            self.rnn_cell_fw_layer2, input_keep_prob=self.keep_prob)

        self.rnn_cell_bw_layer2 = rnn_cell.LSTMCell(self.hidden_size)
        self.rnn_cell_bw_layer2 = DropoutWrapper(
            self.rnn_cell_bw_layer2, input_keep_prob=self.keep_prob)
Beispiel #8
0
    def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, enc_timesteps, bias_init_vector=None):

        self.dim_image = np.int(dim_image)
        self.dim_embed = np.int(dim_embed)
        self.dim_hidden = np.int(dim_hidden)
        self.batch_size = np.int(batch_size)
        self.n_lstm_steps = np.int(n_lstm_steps)
        self.n_words = np.int(n_words)
        self.enc_timesteps = np.int(enc_timesteps)
        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform(
                [n_words, dim_embed], -0.1, 0.1), name='Wemb')

        self.bemb = self.init_bias(dim_embed, name='bemb')

        self.lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True)
        self.lstm = rnn_cell.DropoutWrapper(self.lstm, input_keep_prob=1)
        self.lstm = rnn_cell.MultiRNNCell([self.lstm ])

        self.back_lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True)
        self.back_lstm = rnn_cell.DropoutWrapper(self.back_lstm, input_keep_prob=1)
        self.back_lstm = rnn_cell.MultiRNNCell([self.back_lstm])
        self.encode_img_W = tf.Variable(tf.random_uniform(
            [dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W')
        self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b')

        self.embed_word_W = tf.Variable(tf.random_uniform(
            [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')

        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(
                bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
    def BiRNN(x, weights, biases):
        # Prepare data shape to match `bidirectional_rnn` function requirements
        # Current data input shape: (batch_size, n_steps, n_input)
        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

        # Permuting batch_size and n_steps
        x = tf.transpose(x, [1, 0, 2])
        # Reshape to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, n_input])
        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        x = tf.split(0, n_steps, x)

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0)

        # Get lstm cell output
        try:
            outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                                  dtype=tf.float32)
        except Exception: # Old TensorFlow version only returns outputs not states
            outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                            dtype=tf.float32)

        # Linear activation, using rnn inner loop last output
        return tf.matmul(outputs[-1], weights['out']) + biases['out']
Beispiel #10
0
def inference(images, eval=False):
    NUM_HIDDEN = 128
    W = _variable_with_weight_decay('weight', [2 * NUM_HIDDEN, NUM_CLASSES],
                                    0.001, 0.01)
    b = _variable_with_weight_decay('bias', [NUM_CLASSES], 0.001, 0)
    s = images.get_shape()
    n_batches, n_steps, n_features = int(s[0]), int(s[1]), int(s[2])
    #print(n_batches, n_steps, n_features)
    inputs = tf.reshape(images, [-1, n_features])
    inputs = tf.split(0, n_steps, inputs)

    fw_cell = rnn_cell.LSTMCell(NUM_HIDDEN,
                                forget_bias=1.0,
                                state_is_tuple=True)
    bw_cell = rnn_cell.LSTMCell(NUM_HIDDEN,
                                forget_bias=1.0,
                                state_is_tuple=True)

    try:
        outputs, _, _ = rnn.bidirectional_rnn(fw_cell,
                                              bw_cell,
                                              inputs,
                                              dtype=tf.float32)
    except Exception:
        outputs = rnn.bidirectional_rnn(fw_cell, bw_cell, x, dtype=tf.float32)
    logits = tf.matmul(outputs[-1], W) + b
    return logits
    def __init__(self,
                 hidden_size,
                 keep_prob,
                 cell_type="gru",
                 scope="encoder"):
        """
        Inputs:
          hidden_size:
                int.
                Hidden size of the RNN
          keep_prob: 
                Tensor
                containing a single scalar that is the keep probability (for dropout)
        """
        self.hidden_size = hidden_size
        self.keep_prob = keep_prob
        if cell_type == "gru":
            rnn_cell_fw = rnn_cell.GRUCell(self.hidden_size)
            rnn_cell_bw = rnn_cell.GRUCell(self.hidden_size)
        elif cell_type == "lstm":
            rnn_cell_fw = rnn_cell.LSTMCell(self.hidden_size)
            rnn_cell_bw = rnn_cell.LSTMCell(self.hidden_size)
        else:
            assert (False, "No such cell type for RNN encoder!")

        self.rnn_cell_fw = DropoutWrapper(rnn_cell_fw,
                                          input_keep_prob=self.keep_prob)
        self.rnn_cell_bw = DropoutWrapper(rnn_cell_bw,
                                          input_keep_prob=self.keep_prob)
        self.scope = scope
        logger.info("Encoder created: {} | hidden_size = {}".format(
            cell_type, hidden_size))
Beispiel #12
0
def LSTM_Network(input_, weightsOutH1, weightsClasses, biasesOutH1,
                 biasesClasses):
    ####Network
    forwardH1 = rnn_cell.LSTMCell(nHidden,
                                  use_peepholes=True,
                                  state_is_tuple=True)
    backwardH1 = rnn_cell.LSTMCell(nHidden,
                                   use_peepholes=True,
                                   state_is_tuple=True)
    fbH1, _, _ = bidirectional_rnn(forwardH1,
                                   backwardH1,
                                   inputList,
                                   dtype=tf.float32,
                                   scope='BDLSTM_H1')
    fbH1rs = [tf.reshape(t, [batchSize, 2, nHidden]) for t in fbH1]
    outH1 = [
        tf.reduce_sum(tf.mul(t, weightsOutH1), reduction_indices=1) +
        biasesOutH1 for t in fbH1rs
    ]

    logits = [tf.matmul(t, weightsClasses) + biasesClasses for t in outH1]

    ####Optimizing
    logits3d = tf.pack(logits)
    return logits3d
Beispiel #13
0
    def _createStackBidirectionalRNN(self,
                                     use_gpu,
                                     use_shape,
                                     use_sequence_length,
                                     initial_states_fw=None,
                                     initial_states_bw=None,
                                     scope=None):
        self.layers = [2, 3]
        input_size = 5
        batch_size = 2
        max_length = 8

        initializer = init_ops.random_uniform_initializer(-0.01,
                                                          0.01,
                                                          seed=self._seed)
        sequence_length = array_ops.placeholder(
            dtypes.int64) if use_sequence_length else None

        self.cells_fw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]
        self.cells_bw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]

        inputs = max_length * [
            array_ops.placeholder(
                dtypes.float32,
                shape=(batch_size, input_size) if use_shape else
                (None, input_size))
        ]
        outputs, state_fw, state_bw = contrib_rnn.stack_bidirectional_rnn(
            self.cells_fw,
            self.cells_bw,
            inputs,
            initial_states_fw,
            initial_states_bw,
            dtype=dtypes.float32,
            sequence_length=sequence_length,
            scope=scope)

        self.assertEqual(len(outputs), len(inputs))
        for out in outputs:
            self.assertAlmostEqual(
                out.get_shape().as_list(),
                [batch_size if use_shape else None, 2 * self.layers[-1]])

        input_value = np.random.randn(batch_size, input_size)
        outputs = array_ops.stack(outputs)

        return input_value, inputs, outputs, state_fw, state_bw, sequence_length
Beispiel #14
0
def RNN(x, weight, bias):
    cell1 = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell2 = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell = rnn_cell.MultiRNNCell([cell1, cell2])
    output, state = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32)
    output = tf.transpose(output, [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
def get_train_model():
    x, y, params = get_training_model()

    inputs = tf.placeholder(tf.float32, [None, None, common.OUTPUT_SHAPE[0]])

    # Here we use sparse_placeholder that will generate a
    # SparseTensor required by ctc_loss op.
    targets = tf.sparse_placeholder(tf.int32)

    # 1d array of size [batch_size]
    seq_len = tf.placeholder(tf.int32, [None])

    # Defining the cell for forward and backward layer
    forwardH1 = rnn_cell.LSTMCell(common.num_hidden,
                                  use_peepholes=True,
                                  state_is_tuple=True)
    backwardH1 = rnn_cell.LSTMCell(common.num_hidden,
                                   use_peepholes=True,
                                   state_is_tuple=True)

    # The second output previous state and is ignored
    outputs, _ = tf.nn.bidirectional_dynamic_rnn(forwardH1,
                                                 backwardH1,
                                                 x,
                                                 seq_len,
                                                 dtype=tf.float32)
    outputs = tf.concat(2, outputs)
    shape = tf.shape(inputs)

    batch_s, max_timesteps = shape[0], shape[1]
    weights = tf.Variable(tf.truncated_normal(
        [common.num_hidden, common.num_classes], stddev=0.4),
                          name="weights")
    # Reshaping to apply the same weights over the timesteps
    outputs = tf.reshape(outputs, [-1, 2 * common.num_hidden])

    # Truncated normal with mean 0 and stdev=0.5
    W = tf.Variable(tf.truncated_normal(
        [2 * common.num_hidden, common.num_classes], stddev=0.5),
                    name="W")

    # Zero initialization
    b = tf.zeros(shape=[common.num_classes], name='b')

    # Doing the affine projection
    logits = tf.matmul(outputs, W) + b

    # Reshaping back to the original shape
    logits = tf.reshape(logits, [batch_s, -1, common.num_classes])

    # Time major
    logits = tf.transpose(logits, (1, 0, 2))

    return logits, inputs, targets, seq_len, W, b
Beispiel #16
0
    def __init__(self, seq_length, vocab_size, stack_dimension, batch_size):
        config = tf.ConfigProto(allow_soft_placement=True)
        self.sess = tf.Session(config=config)

        self.seq_length = seq_length
        self.vocab_size = vocab_size
        self.memory_dim = vocab_size

        self.enc_inp = [
            tf.placeholder(tf.float32,
                           shape=(vocab_size, batch_size),
                           name="enc_inp%i" % t) for t in range(seq_length)
        ]

        self.dec_inp = self.enc_inp[:-1] + [
            tf.zeros_like(self.enc_inp[0], dtype=np.float32, name="GO")
        ]

        single_enc_cell = rnn_cell.LSTMCell(self.memory_dim,
                                            state_is_tuple=False)
        self.enc_cell = rnn_cell.MultiRNNCell([single_enc_cell] *
                                              stack_dimension,
                                              state_is_tuple=True)
        _, encoder_state = rnn.rnn(self.enc_cell,
                                   self.enc_inp,
                                   dtype=tf.float32)

        single_dec_cell = rnn_cell.LSTMCell(self.memory_dim,
                                            state_is_tuple=False)
        self.dec_cell = rnn_cell.MultiRNNCell([single_dec_cell] *
                                              stack_dimension,
                                              state_is_tuple=True)

        self.Ws = tf.Variable(
            tf.random_uniform([self.memory_dim, self.vocab_size], 0, 0.1))
        self.bs = tf.Variable(tf.random_uniform([self.vocab_size], -0.1, 0.1))

        self.dec_outputs, self.dec_state = rnn_decoder(
            self.dec_inp, encoder_state, self.dec_cell, self.Ws, self.bs,
            vocab_size, batch_size, self.memory_dim)

        self.labels = [
            tf.placeholder(tf.float32, [vocab_size, batch_size],
                           name='LABEL%i' % t) for t in range(seq_length)
        ]
        self.weights = [
            tf.ones_like(labels_t, dtype=tf.float32)
            for labels_t in self.labels
        ]
        self.loss = loss(self.labels, self.dec_outputs)

        self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.loss)
        self.sess.run(tf.initialize_all_variables())
Beispiel #17
0
 def __init__(self, hidden_size, keep_prob,model_name="RNNModelEncoder"):
     """
     Inputs:
       hidden_size: int. Hidden size of the RNN
       keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
     """
     self.hidden_size = hidden_size
     self.keep_prob = keep_prob
     self.rnn_cell_fw = rnn_cell.LSTMCell(self.hidden_size)
     self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob)
     self.rnn_cell_bw = rnn_cell.LSTMCell(self.hidden_size)
     self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
     self.model_name=model_name
Beispiel #18
0
    def __init__(self, input_size, output_size):
        """
		Inputs:
		  input_size: the dimension of the input states
		  output_size: the dimension of the output states for each direction
			of the BiLSTM.
		"""
        self.input_size = input_size
        self.output_size = output_size
        self.rnn_cell_fw = rnn_cell.LSTMCell(num_units=self.input_size,
                                             num_proj=self.output_size)
        self.rnn_cell_bw = rnn_cell.LSTMCell(num_units=self.input_size,
                                             num_proj=self.output_size)
Beispiel #19
0
def inference(inputs, n_input, n_steps, n_hidden, n_classes):
	W = tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
	b = tf.Variable(tf.random_normal([n_classes]))
	inputs = tf.reshape(inputs, [-1, n_input])
	inputs = tf.split(0, n_steps, inputs)

	fw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias = 1.0, state_is_tuple = True)
	bw_cell = rnn_cell.LSTMCell(n_hidden, forget_bias = 1.0, state_is_tuple = True)

	try:
		outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype = tf.float32)
	except Exception:
		outputs = rnn.bidirectional_rnn(fw_cell, bw_cell, x, dtype = tf.float32)
	return tf.matmul(outputs[-1], W) + b
Beispiel #20
0
 def __init__(self, keep_prob, key_vec_size, value_vec_size):
     """
     Inputs:
       keep_prob: tensor containing a single scalar that is the keep probability (for dropout)
       key_vec_size: size of the key vectors. int
       value_vec_size: size of the value vectors. int
     """
     self.keep_prob = keep_prob
     self.key_vec_size = key_vec_size
     self.value_vec_size = value_vec_size
     self.rnn_cell_fw = rnn_cell.LSTMCell(value_vec_size/2, reuse=tf.AUTO_REUSE)
     self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob)
     self.rnn_cell_bw = rnn_cell.LSTMCell(value_vec_size/2, reuse=tf.AUTO_REUSE)
     self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
Beispiel #21
0
def BRNN(x, weight, bias):
    cell1_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell2_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell_fw = rnn_cell.MultiRNNCell([cell1_fw, cell2_fw])

    cell1_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell2_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell_bw = rnn_cell.MultiRNNCell([cell1_bw, cell2_bw])

    output, out_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x, dtype = tf.float32)
    # print(output[-1].get_shape().as_list())
    output = tf.transpose(output[-1], [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
Beispiel #22
0
 def __init__(self, keep_prob, qn_vec_size, cxt_vec_size):
     """
     Inputs:
         keep_prob: tensor containing a single scalar that is the keep probability (for dropout)
         qn_vec_size: size of the question vectors. int
         cxt_vec_size: size of the context vectors. int
     """
     self.keep_prob = keep_prob
     self.qn_vec_size = qn_vec_size
     self.cxt_vec_size = cxt_vec_size
     self.rnn_cell_fw = rnn_cell.LSTMCell(cxt_vec_size/2, reuse=tf.AUTO_REUSE)
     self.rnn_cell_fw = DropoutWrapper(self.rnn_cell_fw, input_keep_prob=self.keep_prob)
     self.rnn_cell_bw = rnn_cell.LSTMCell(cxt_vec_size/2, reuse=tf.AUTO_REUSE)
     self.rnn_cell_bw = DropoutWrapper(self.rnn_cell_bw, input_keep_prob=self.keep_prob)
Beispiel #23
0
    def build_graph(self):
        input_lens = tf.reduce_sum(self.X_mask_placeholder, axis=1)
        inputs = self.X_placeholder

        char_embedding = self.convolve()
        inputs = tf.concat([self.X_placeholder, char_embedding], axis=-1)
        inputs = tf.concat([inputs, self.features], axis=-1)

        for i in range(0, self.depth):
            lstm_cell_forward = rnn_cell.LSTMCell(self.hidden_size)
            lstm_cell_forward = DropoutWrapper(lstm_cell_forward,
                                               input_keep_prob=self.keep_prob)
            lstm_cell_backward = rnn_cell.LSTMCell(self.hidden_size)
            lstm_cell_backward = DropoutWrapper(lstm_cell_backward,
                                                input_keep_prob=self.keep_prob)
            (fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn(
                lstm_cell_forward,
                lstm_cell_backward,
                inputs,
                input_lens,
                dtype=tf.float32,
                scope='layer' + str(i))
            out = tf.concat([fw_out, bw_out], 2)
            out = tf.nn.dropout(out, self.keep_prob)
            inputs = out

        h = tf.contrib.layers.fully_connected(out,
                                              num_outputs=self.hidden_size,
                                              activation_fn=tf.nn.relu)

        rows = tf.range(0, tf.shape(input_lens)[-1])
        indices = tf.subtract(input_lens, tf.ones_like(input_lens))
        indices = tf.nn.relu(indices)
        slicer = tf.stack([rows, indices], axis=1)

        h = tf.gather_nd(h, slicer)

        weights = tf.get_variable(
            "W",
            shape=[self.hidden_size, self.num_classes],
            initializer=tf.contrib.layers.xavier_initializer())
        bias = tf.get_variable("b",
                               shape=[self.num_classes],
                               initializer=tf.zeros_initializer())

        logits = tf.nn.xw_plus_b(h, weights, bias, name="logits")

        preds = tf.argmax(logits, 1)

        return logits, preds
Beispiel #24
0
	def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):
		self.rnn_size = rnn_size
		self.rnn_layer = rnn_layer
		self.batch_size = batch_size
		self.input_embedding_size = input_embedding_size
		self.dim_image = dim_image
		self.dim_hidden = dim_hidden
		self.max_words_q = max_words_q
		self.vocabulary_size = vocabulary_size
		self.drop_out_rate = drop_out_rate

		# Before-LSTM-embedding
		self.embed_BLSTM_Q_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_Q_W')
		self.embed_BLSTM_A_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_A_W')

		# encoder: RNN body
		self.lstm_1_q = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_1_q = rnn_cell.DropoutWrapper(self.lstm_1_q, output_keep_prob = 1 - self.drop_out_rate)
		self.lstm_2_q = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_2_q = rnn_cell.DropoutWrapper(self.lstm_2_q, output_keep_prob = 1 - self.drop_out_rate)
		self.stacked_lstm_q = rnn_cell.MultiRNNCell([self.lstm_dropout_1_q, self.lstm_dropout_2_q],state_is_tuple=False)

		self.lstm_1_a = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_1_a = rnn_cell.DropoutWrapper(self.lstm_1_a, output_keep_prob = 1 - self.drop_out_rate)
		self.lstm_2_a = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_2_a = rnn_cell.DropoutWrapper(self.lstm_2_a, output_keep_prob = 1 - self.drop_out_rate)
		self.stacked_lstm_a = rnn_cell.MultiRNNCell([self.lstm_dropout_1_a, self.lstm_dropout_2_a],state_is_tuple=False)

		# question-embedding W1
		self.embed_Q_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_Q_W')
		self.embed_Q_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_Q_b')
		
		# Answer-embedding W3
		self.embed_A_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_A_W')
		self.embed_A_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_A_b')

		# image-embedding W2
		self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
		self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b')

		# score-embedding W4
		#self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
		#self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
		self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
		self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')

		# QI-embedding W3
		self.embed_QI_W = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.08, 0.08), name='embed_QI_W')
		self.embed_QI_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_QI_b')
Beispiel #25
0
    def _create_encoder(self, args):
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.encoder_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.full_lstm = rnn_cell.MultiRNNCell([lstm] *
                                               args.num_encoder_layers,
                                               state_is_tuple=True)
        self.lstm_state = self.full_lstm.zero_state(args.batch_size,
                                                    tf.float32)

        # Forward pass
        encoder_input = tf.concat(1, [self.states_encode, self.actions_encode])
        output, self.final_state = seq2seq.rnn_decoder([encoder_input],
                                                       self.lstm_state,
                                                       self.full_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.encoder_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("latent_w", [args.encoder_size, 2 * args.z_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("latent_b", [2 * args.z_dim])
        logits = tf.nn.xw_plus_b(output, W, b)

        # Separate into mean and logstd
        self.z_mean, self.z_logstd = tf.split(1, 2, logits)
Beispiel #26
0
    def test_temporal_classification_sequential_tf_rnn(self):
        with self.cached_session():
            np.random.seed(1337)
            (x_train,
             y_train), _ = testing_utils.get_test_data(train_samples=100,
                                                       test_samples=0,
                                                       input_shape=(4, 10),
                                                       num_classes=2)
            y_train = keras.utils.to_categorical(y_train)

            model = keras.models.Sequential()
            model.add(
                keras.layers.RNN(rnn_cell.LSTMCell(5),
                                 return_sequences=True,
                                 input_shape=x_train.shape[1:]))
            model.add(
                keras.layers.RNN(
                    rnn_cell.GRUCell(y_train.shape[-1],
                                     activation='softmax',
                                     dtype=dtypes.float32)))
            model.compile(loss='categorical_crossentropy',
                          optimizer=keras.optimizers.Adam(lr=0.1),
                          metrics=['accuracy'])
            history = model.fit(x_train,
                                y_train,
                                epochs=15,
                                batch_size=16,
                                validation_data=(x_train, y_train),
                                verbose=2)
            self.assertGreater(history.history['val_acc'][-1], 0.7)
Beispiel #27
0
  def testCustomizedAttention(self):
    batch_size = 2
    max_time = 3
    num_units = 2
    memory = constant_op.constant([[[1., 1.], [2., 2.], [3., 3.]],
                                   [[4., 4.], [5., 5.], [6., 6.]]])
    memory_sequence_length = constant_op.constant([3, 2])
    attention_mechanism = wrapper.BahdanauAttention(num_units, memory,
                                                    memory_sequence_length)

    # Sets all returned values to be all ones.
    def _customized_attention(unused_attention_mechanism, unused_cell_output,
                              unused_attention_state, unused_attention_layer):
      """Customized attention.

      Returns:
        attention: `Tensor` of shape [batch_size, num_units], attention output.
        alignments: `Tensor` of shape [batch_size, max_time], sigma value for
          each input memory (prob. function of input keys).
        next_attention_state: A `Tensor` representing the next state for the
          attention.
      """
      attention = array_ops.ones([batch_size, num_units])
      alignments = array_ops.ones([batch_size, max_time])
      next_attention_state = alignments
      return attention, alignments, next_attention_state

    attention_cell = wrapper.AttentionWrapper(
        rnn_cell.LSTMCell(2),
        attention_mechanism,
        attention_layer_size=None,  # don't use attention layer.
        output_attention=False,
        alignment_history=(),
        attention_fn=_customized_attention,
        name='attention')
    self.assertEqual(num_units, attention_cell.output_size)

    initial_state = attention_cell.zero_state(
        batch_size=2, dtype=dtypes.float32)
    source_input_emb = array_ops.ones([2, 3, 2])
    source_input_length = constant_op.constant([3, 2])

    # 'state' is a tuple of
    # (cell_state, h, attention, alignments, alignment_history, attention_state)
    output, state = rnn.dynamic_rnn(
        attention_cell,
        inputs=source_input_emb,
        sequence_length=source_input_length,
        initial_state=initial_state,
        dtype=dtypes.float32)

    with self.session() as sess:
      sess.run(variables.global_variables_initializer())
      output_value, state_value = sess.run([output, state], feed_dict={})
      self.assertAllEqual(np.array([2, 3, 2]), output_value.shape)
      self.assertAllClose(np.array([[1., 1.], [1., 1.]]), state_value.attention)
      self.assertAllClose(
          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.alignments)
      self.assertAllClose(
          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.attention_state)
Beispiel #28
0
  def testLuongScaledDType(self):
    # Test case for GitHub issue 18099
    for dt in [np.float16, np.float32, np.float64]:
      num_units = 128
      encoder_outputs = array_ops.placeholder(dt, shape=[64, None, 256])
      encoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64])
      decoder_inputs = array_ops.placeholder(dt, shape=[64, None, 128])
      decoder_sequence_length = array_ops.placeholder(dtypes.int32, shape=[64])
      batch_size = 64
      attention_mechanism = wrapper.LuongAttention(
          num_units=num_units,
          memory=encoder_outputs,
          memory_sequence_length=encoder_sequence_length,
          scale=True,
          dtype=dt,
      )
      cell = rnn_cell.LSTMCell(num_units)
      cell = wrapper.AttentionWrapper(cell, attention_mechanism)

      helper = helper_py.TrainingHelper(decoder_inputs,
                                        decoder_sequence_length)
      my_decoder = basic_decoder.BasicDecoder(
          cell=cell,
          helper=helper,
          initial_state=cell.zero_state(
              dtype=dt, batch_size=batch_size))

      final_outputs, final_state, _ = decoder.dynamic_decode(my_decoder)
      self.assertTrue(
          isinstance(final_outputs, basic_decoder.BasicDecoderOutput))
      self.assertEqual(final_outputs.rnn_output.dtype, dt)
      self.assertTrue(
          isinstance(final_state, wrapper.AttentionWrapperState))
      self.assertTrue(
          isinstance(final_state.cell_state, rnn_cell.LSTMStateTuple))
    def lstm(self):
        """
        prepare the input shape for the lstm, the oringinal shape is (batch_size, seq_size, input_dim)
        but must be transformed to a tensor list with the length seq_size, of the shape (batch_size, input_dim)

        must copy self.X to a new tensor X
        """

        X = self.X
        #permute batch_size and seq_size
        X = tf.transpose(
            X, [1, 0, 2
                ])  #the [1] becomes [0], [0] becomes [1], [2] stays the same

        #reshape to (seq_size*batch_size, input_dim)
        X = tf.reshape(X, [-1, self.input_dim])

        #split the list of tensors
        X = tf.split(X, self.seq_size)

        #create lstm and add dropout
        lstm_cell = rnn_cell.LSTMCell(self.hidden_dim, use_peepholes=True)
        lstm_cell = rnn_cell.DropoutWrapper(lstm_cell,
                                            input_keep_prob=self.keep_prob,
                                            output_keep_prob=self.keep_prob)
        outputs, states = rnn.dynamic_rnn(lstm_cell, X, dtype=tf.float32)

        output = tf.matmul(
            outputs[-1],
            self.weights) + self.biases  #the last output and process
        return output
    def benchmarkTfRNNLSTMTraining(self):
        test_configs = self._GetTestConfig()
        for config_name, config in test_configs.items():
            num_layers = config["num_layers"]
            num_units = config["num_units"]
            batch_size = config["batch_size"]
            seq_length = config["seq_length"]

            with ops.Graph().as_default(), ops.device("/gpu:0"):
                inputs = seq_length * [
                    array_ops.zeros([batch_size, num_units], dtypes.float32)
                ]
                initializer = init_ops.random_uniform_initializer(-0.01,
                                                                  0.01,
                                                                  seed=127)

                cell = rnn_cell.LSTMCell(num_units=num_units,
                                         initializer=initializer,
                                         state_is_tuple=True)
                multi_cell = rnn_cell.MultiRNNCell(
                    [cell() for _ in range(num_layers)])
                outputs, final_state = core_rnn.static_rnn(
                    multi_cell, inputs, dtype=dtypes.float32)
                trainable_variables = ops.get_collection(
                    ops.GraphKeys.TRAINABLE_VARIABLES)
                gradients = gradients_impl.gradients([outputs, final_state],
                                                     trainable_variables)
                training_op = control_flow_ops.group(*gradients)
                self._BenchmarkOp(
                    training_op, "tf_rnn_lstm %s %s" %
                    (config_name, self._GetConfigDesc(config)))