def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size,
          _seq_len):

    # BiRNN requires to supply sequence_length as [batch_size, int64]
    # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set
    # For a better implementation with latest version of tensorflow, check below
    _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64))

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input])  # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X)  # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                    lstm_bw_cell,
                                    _X,
                                    initial_state_fw=_istate_fw,
                                    initial_state_bw=_istate_bw,
                                    sequence_length=_seq_len)

    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
Exemple #2
0
    def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases):

        # input shape: (batch_size, n_steps, n_input)
        _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
        # Reshape to prepare input to hidden activation
        # (n_steps*batch_size, n_input)
        _X = tf.reshape(_X, [-1, self.config.num_input])
        # Linear activation
        _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

        # Forward direction cell
        rnn_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)
        # Backward direction cell
        rnn_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)

        # Split data because rnn cell needs a list of inputs for the RNN inner
        # loop
        # n_steps * (batch_size, n_hidden)
        _X = tf.split(0, self.config.num_steps, _X)

        # Get lstm cell output
        outputs, final_fw, final_bw = rnn.bidirectional_rnn(
            rnn_fw_cell,
            rnn_bw_cell,
            _X,
            initial_state_fw=_istate_fw,
            initial_state_bw=_istate_bw)
        # Linear activation
        return [
            tf.matmul(output, _weights['out']) + _biases['out']
            for output in outputs
        ], final_fw, final_bw
Exemple #3
0
def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                    lstm_bw_cell,
                                    x,
                                    dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input])  # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X)  # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                    lstm_bw_cell,
                                    _X,
                                    initial_state_fw=_istate_fw,
                                    initial_state_bw=_istate_bw)
    # Linear activation
    # Get inner loop last output
    output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs]
    return output
    def __init__(self, is_training, glove_word_vectors, vocabulary, config):
        self.size = config.hidden_size
        self.config = config
        self.is_training = is_training
        self.word_vec_size = config.word_vec_size
        vocab_size = config.vocab_size
        self.glove_word_vectors = glove_word_vectors
        self.vocabulary = vocabulary

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.

        # TODO: these might be able to be improved if used the LSTMCell which has other features
        # to improve performance, but then need the sentence_length
        with tf.variable_scope("LeftLSTM"):
            self.left_lstm_cell = rnn_cell.BasicLSTMCell(self.size,
                                                         forget_bias=1.0)
        with tf.variable_scope("RightLSTM"):
            self.right_lstm_cell = rnn_cell.BasicLSTMCell(self.size,
                                                          forget_bias=1.0)
        if is_training and config.keep_prob < 1:
            with tf.variable_scope("LeftLSTM"):
                self.left_lstm_cell = rnn_cell.DropoutWrapper(
                    self.left_lstm_cell, output_keep_prob=config.keep_prob)
            with tf.variable_scope("RightLSTM"):
                self.right_lstm_cell = rnn_cell.DropoutWrapper(
                    self.right_lstm_cell, output_keep_prob=config.keep_prob)

        with tf.variable_scope("LeftLSTM"):
            self.left_lstm_cell = rnn_cell.MultiRNNCell([self.left_lstm_cell] *
                                                        config.num_layers)
        with tf.variable_scope("RightLSTM"):
            self.right_lstm_cell = rnn_cell.MultiRNNCell(
                [self.right_lstm_cell] * config.num_layers)
Exemple #6
0
    def __init__(self,
                 batch_size,
                 len_question,
                 len_answer,
                 n_answers,
                 n_words,
                 dim_embed,
                 dim_hidden,
                 bias_init_vector=None):

        self.batch_size = batch_size
        self.len_question = len_question
        self.len_answer = len_answer
        self.n_answers = n_answers
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed],
                                                      -0.1, 0.1),
                                    name='Wemb')

        self.W_emb_hid_Q = tf.Variable(tf.random_uniform(
            [dim_embed, dim_hidden], -0.1, 0.1),
                                       name='W_emb_hid_Q')
        self.b_emb_hid_Q = tf.Variable(tf.zeros([dim_hidden]),
                                       name='b_emb_hid_Q')

        self.W_emb_hid_A = tf.Variable(tf.random_uniform(
            [dim_embed, dim_hidden], -0.1, 0.1),
                                       name='W_emb_hid_A')
        self.b_emb_hid_A = tf.Variable(tf.zeros([dim_hidden]),
                                       name='b_emb_hid_Q')

        self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden)

        self.W_hid_emb = tf.Variable(tf.random_uniform([dim_hidden, dim_embed],
                                                       -0.1, 0.1),
                                     name='W_hid_emb')
        self.b_hid_emb = tf.Variable(tf.zeros([dim_embed]), name='b_hid_emb')

        self.W_emb_word = tf.Variable(tf.random_uniform([dim_embed, n_words],
                                                        -0.1, 0.1),
                                      name='W_emb_word')

        if bias_init_vector is not None:
            self.b_embed_word = tf.Variable(bias_init_vector.astype(
                np.float32),
                                            name='b_embed_word')
        else:
            self.b_emb_word = tf.Variable(tf.zeros([n_words]),
                                          name='b_emb_word')
    def __init__(self,
                 batch_size,
                 len_question,
                 len_answer,
                 n_answers,
                 n_words,
                 dim_embed,
                 dim_hidden,
                 bias_init_vector=None):

        self.batch_size = batch_size
        self.len_question = len_question
        self.len_answer = len_answer
        self.n_answers = n_answers
        self.n_words = n_words
        self.dim_embed = dim_embed
        self.dim_hidden = dim_hidden

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed],
                                                      -0.1, 0.1),
                                    name='Wemb')

        self.W_emb_hid_Q = tf.Variable(tf.random_uniform(
            [dim_embed, dim_hidden], -0.1, 0.1),
                                       name='W_emb_hid_Q')
        self.b_emb_hid_Q = tf.Variable(tf.zeros([dim_hidden]),
                                       name='b_emb_hid_Q')

        self.W_emb_hid_A = tf.Variable(tf.random_uniform(
            [dim_embed, dim_hidden], -0.1, 0.1),
                                       name='W_emb_hid_A')
        self.b_emb_hid_A = tf.Variable(tf.zeros([dim_hidden]),
                                       name='b_emb_hid_Q')

        self.lstm_fw_Q = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm_bw_Q = rnn_cell.BasicLSTMCell(dim_hidden)

        self.lstm_fw_A = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm_bw_A = rnn_cell.BasicLSTMCell(dim_hidden)

        self.W_Q_emb = tf.Variable(tf.random_uniform(
            [dim_hidden * 2, dim_embed], -0.1, 0.1),
                                   name='W_Q_emb')
        self.b_Q_emb = tf.Variable(tf.zeros([dim_embed]), name='b_Q_emb')

        self.W_A_emb = tf.Variable(tf.random_uniform(
            [dim_hidden * 2, dim_embed], -0.1, 0.1),
                                   name='W_A_emb')
        self.b_A_emb = tf.Variable(tf.zeros([dim_embed]), name='b_A_emb')
Exemple #8
0
    def initialize_model(self):
        self.keep_prob = tf.placeholder(tf.float32)
        sigma = 1e-3
        


        #embeddings = tf.Variable(tf.convert_to_tensor(wv, dtype=tf.float32), name="Embedding")

        self.x = tf.placeholder(tf.float32, shape = (self.batch_size, wv_dim, self.num_steps))
        self.y = tf.placeholder(tf.int32, shape = (self.batch_size, self.num_steps))
        self.loan_amounts = tf.placeholder(tf.float32, shape = (self.batch_size, self.num_steps))

        if self.num_steps > 1:
            inputs = map(tf.squeeze, tf.split(2, self.num_steps, self.x))
            loans = tf.split(1, self.num_steps, self.loan_amounts)
        else:
            inputs = [self.x[:,:,0]]
            loans = [self.loan_amounts]

        filter_number_1 = 256
        filter_number_2 = 144

        cell1 = rnn_cell.BasicLSTMCell(filter_number_1, forget_bias=1.0, input_size = wv_dim)
        cell2 = rnn_cell.BasicLSTMCell(filter_number_2, forget_bias=1.0, input_size = filter_number_1)
        cell = rnn_cell.MultiRNNCell([cell1, cell2])
        self.initial_state = cell.zero_state(self.batch_size, tf.float32)
        state = self.initial_state
        self.loss = 0
        rnn_outputs = []

        for idx, batch in enumerate(inputs):
            with tf.variable_scope("RNN") as scope:
                if idx > 0:
                    scope.reuse_variables()
                wc3 = tf.get_variable("wc3", (filter_number_2 + 1, self.n_classes), 
                    initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32))
                bc3 = tf.get_variable("bc3", (self.n_classes,),
                    initializer = tf.random_normal_initializer(mean=0.0, stddev=sigma, seed=None, dtype=tf.float32))
                output, state = cell(batch, state)
                pred = bc3 + tf.matmul(tf.concat(1, [loans[idx], output]), wc3)
                #pred = tf.matmul(output, wc3) + bc3
                rnn_outputs.append(pred)
                self.previous_state = state

        self.output = tf.argmax(rnn_outputs[-1], 1)
        for i in range(len(inputs)):
            #print rnn_outputs[i].get_shape()
            self.loss += tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(rnn_outputs[i], self.y[:,i]))

        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
Exemple #9
0
    def __init__(self, vocab_size, batch_size, sequece_length, embedding_size, num_classes):
        self.hyperParam = {}
        self.hyperParam["hidden_num"] = 20
        self.hyperParam["l2_lamda"] = 3;
        self.hyperParam["dropout_keep_prob"] = 0.5;
        l2_loss = tf.constant(0.0)
        
        self.dropout_keep_prob = 0.5
        ##rnnCell = rnn_cell.BasicRNNCell(hidden_num)
        rnnCell = rnn_cell.BasicLSTMCell(self.hyperParam["hidden_num"], forget_bias=1.0) 
        self.input_data = tf.placeholder(tf.int32, shape=[None, sequece_length], name = "input_data")
        self.weights = tf.placeholder(tf.int32, shape=[None, sequece_length], name= "weights")
        self.output_data = tf.placeholder(tf.int32, [None, sequece_length], name = "output_data")
        a = tf.shape(self.output_data)[0]

        #self.inputs = []
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embedding_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            #for i, v in enumerate(input_refine):
            #    self.inputs.append(tf.nn.embedding_lookup(embedding, input_refine[i]))
        self.inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sequece_length, inputs)]
        self.output, self.states = rnn.rnn(rnnCell, self.inputs, dtype=tf.float32)

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = [tf.nn.dropout(p, self.hyperParam["dropout_keep_prob"]) for p in self.output]

        predictions = [];
        with tf.name_scope("result"):
            W = tf.Variable(tf.truncated_normal([self.hyperParam["hidden_num"], num_classes], stddev=0.1), name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            #output = tf.reshape(tf.concat(1, self.output), [-1, hidden_num])
            output = tf.reshape(tf.concat(1, self.h_drop), [-1, self.hyperParam["hidden_num"]])
            logits = tf.matmul(output, W) + b
            self.scores = logits
            #self.new_scores = [tf.squeeze(k, [1]) for k in tf.split(1, sequece_length, tf.reshape(logits, [-1, sequece_length ,num_classes]))]

        losses = 0;
        accuracy = []
        with tf.name_scope("loss"):
            output_refine = tf.reshape(self.output_data, [-1])
            #output_refine = tf.split(1, sequece_length, self.output_data)
            #weigth = tf.ones_like(output_refine, dtype="float32")
            weight = tf.reshape(tf.cast(self.weights, "float32"), [-1])
            loss = seq2seq.sequence_loss_by_example([self.scores], [output_refine], [weight],num_classes);
            self.loss = tf.reduce_sum(loss)/tf.cast(a, "float32") + self.hyperParam["l2_lamda"]*l2_loss
            #self.accuracy = tf.reduce_mean(tf.cast(tf.concat(0, accuracy), "float"))

        with tf.name_scope("accurcy"):
            self.predictions = tf.argmax(tf.reshape(self.scores, [-1, sequece_length, num_classes]), 2)
            #self.kk = tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "int64")
            aa = tf.expand_dims(tf.reshape(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), [-1]), 0)
            bb = tf.expand_dims(tf.cast(tf.reshape(self.weights, [-1]), "float32"), 0)
            self.kk = tf.squeeze(tf.matmul(aa, bb, transpose_b=True))/tf.reduce_sum(tf.cast(self.weights, "float32"), [0,1])
             
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(self.output_data, "int64")), "float32"), name="accrucy")
Exemple #10
0
def RNN(x, input_size, num_hidden):
    weights = {
        'hidden':
        tf.Variable(tf.random_normal([input_size,
                                      num_hidden])),  # Hidden layer weights
        'out': tf.Variable(tf.random_normal([num_hidden, 1]))
    }
    biases = {
        'hidden': tf.Variable(tf.random_normal([num_hidden])),
        'out': tf.Variable(tf.random_normal([1]))
    }

    X_t = tf.transpose(x, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    X_r = tf.reshape(X_t, [-1, input_size])  # (n_steps*batch_size, n_input)

    X_m = tf.matmul(X_r, weights['hidden']) + biases['hidden']

    X_s = tf.split(0, seq_len, X_m)  # n_steps * (batch_size, n_hidden)

    lstm_cell = rnn_cell.BasicLSTMCell(num_hidden, forget_bias=1.0)

    outputs, states = rnn.rnn(
        lstm_cell, X_s,
        dtype=tf.float32)  #note that outputs is a list of seq_len

    return tf.matmul(outputs[-1], weights['out']) + biases[
        'out']  #each element is a tensor of size [batch_size,num_units]
Exemple #11
0
    def create_model(self):
        
        self.input_data  = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="input_data")
        self.target_data = tf.placeholder(tf.int32,[self.batch_size, self.seq_length],  name="target_data")

        # define hyper_parameters
        self.keep_prob = tf.Variable(0.3, trainable=False, name='keep_prob')
        self.lr = tf.Variable(0.0, trainable=False, name="lr")
              
        softmax_weights = tf.get_variable("softmax_weights",[self.rnn_size, self.vocab_size])
        softmax_biases = tf.get_variable("softmax_biases", [self.vocab_size])
            
        lstm_cell = rnn_cell.BasicLSTMCell(self.rnn_size)

#        if self.is_training and self.keep_prob < 1:
#              lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob)
        
        multilayer_cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)
        self.initial_state = multilayer_cell.zero_state(self.batch_size, tf.float32)    
        
        
            
        with tf.device("/cpu:0"):
            # define the embedding matrix for the whole vocabulary
            self.embedding = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])
            # take the vector representation for each word in the embeddings
            embeds = tf.nn.embedding_lookup(self.embedding, self.input_data)
        
        if self.is_training and self.keep_prob < 1:
            embeds = tf.nn.dropout(embeds, self.keep_prob)
        
        
        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_weights, softmax_biases)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)
            
        #convert input to a list of seq_length
        inputs = tf.split(1,self.seq_length, embeds)
        
        #after splitting the shape becomes (batch_size,1,rnn_size). We need to modify it to [batch*rnn_size]
        inputs = [ tf.squeeze(input_, [1]) for input_ in inputs]    
    
        output,states= seq2seq.rnn_decoder(inputs,self.initial_state, multilayer_cell, loop_function=loop if self.infer else None, scope='rnnlm')
        
        output = tf.reshape(tf.concat(1, output), [-1, self.rnn_size])
        
        self.logits = tf.nn.xw_plus_b(output, softmax_weights, softmax_biases)
        self.probs = tf.nn.softmax(self.logits, name= "probability")
        
        loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.target_data, [-1])],  [tf.ones([self.batch_size * self.seq_length])], self.vocab_size )
        self.cost = tf.reduce_sum(loss) / ( self.batch_size * self.seq_length )
        
        self.final_state= states[-1]
        
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),self.grad_clip)
        
        optimizer = tf.train.AdamOptimizer(0.01)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #12
0
    def __init__(self, config):
        lstm_cell = rnn_cell.BasicLSTMCell(config.n_hidden, forget_bias=0.0)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self._train_op = tf.no_op()
        self._input_data = tf.placeholder(tf.int32, [config.batch_size])
        _X = tf.matmul(self._input_data,
                       tf.get_variable("weights_out", [
                           config.n_hidden, 1
                       ])) + tf.get_variable("bias_hidden", [config.n_hidden])
        self._targets = tf.placeholder(tf.int32, [config.batch_size])
        self._initial_state = cell.zero_state(config.batch_size, tf.float32)
        state = self._initial_state

        outputs, states = rnn.rnn(cell,
                                  self.input_data,
                                  tf.split(0, 1, _X),
                                  initial_state=state)
        pred = tf.matmul(
            outputs[-1],
            tf.get_variable("weights_hidden",
                            [config.n_features, config.n_hidden
                             ])) + tf.get_variable("weights_out", [1])

        self._final_state = states[-1]
        self._cost = cost = tf.reduce_mean(tf.square(pred - self.targets))
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
        if not config.is_training:
            return

        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=config.learning_rate).minimize(cost)
        self._train_op = optimizer
Exemple #13
0
    def __init__(self,
                 n_input=None,
                 n_steps=None,
                 n_output=None,
                 n_char=None,
                 n_train_batch=1,
                 n_validation=1,
                 n_test=None):
        # model hyperparametes
        self.n_input = n_input
        self.n_steps = n_steps
        self.n_output = n_output
        self.n_char = n_char

        # model parameters
        self.embeddings = tf.Variable(
            tf.random_uniform([n_char, n_input], -1.0, 1.0))
        self.lstm_cell = rnn_cell.BasicLSTMCell(n_input, forget_bias=1.0)
        self.weights = {
            'out': tf.Variable(tf.random_normal([n_input, n_output]))
        }
        self.biases = {'out': tf.Variable(tf.random_normal([n_output]))}

        # train, validation and test models
        self.model_train = Model(n_train_batch, self.weights, self.biases,
                                 self.lstm_cell)
        self.model_validation = Model(n_validation, self.weights, self.biases,
                                      self.lstm_cell)
        self.model_test = Model(n_test, self.weights, self.biases,
                                self.lstm_cell)
Exemple #14
0
def build_lstm_inner(H, lstm_input):
    '''
    build lstm decoder
    '''
    lstm_cell = rnn_cell.BasicLSTMCell(H['lstm_size'],
                                       forget_bias=0.0,
                                       state_is_tuple=False)
    if H['num_lstm_layers'] > 1:
        lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'],
                                     state_is_tuple=False)
    else:
        lstm = lstm_cell

    batch_size = H['batch_size'] * H['grid_height'] * H['grid_width']
    state = tf.zeros([batch_size, lstm.state_size])

    outputs = []
    with tf.variable_scope('RNN',
                           initializer=tf.random_uniform_initializer(
                               -0.1, 0.1)):
        for time_step in range(H['rnn_len']):
            if time_step > 0: tf.get_variable_scope().reuse_variables()
            output, state = lstm(lstm_input, state)
            outputs.append(output)
    return outputs
Exemple #15
0
def predict_next_frame(H, lstm_input):
    lstm_cell = rnn_cell.BasicLSTMCell(832,
                                       forget_bias=0.0,
                                       state_is_tuple=False)
    if H['num_lstm_layers'] > 1:
        lstm = rnn_cell.MultiRNNCell([lstm_cell] * H['num_lstm_layers'],
                                     state_is_tuple=False)
    else:
        lstm = lstm_cell

    batch_size = H['batch_size'] * H['grid_height'] * H['grid_width']
    state = tf.zeros([batch_size, lstm.state_size])

    outputs = []
    with tf.variable_scope('RNN',
                           initializer=tf.random_uniform_initializer(
                               -0.1, 0.1)):
        for i in range(9):
            if i > 0: tf.get_variable_scope().reuse_variables()
            input_data = tf.reshape(lstm_input[8 - i], [300, 832])
            output, state = lstm(input_data, state)
            output = tf.reshape(output, [1, 15, 20, 832])
            outputs.append(output)

    return outputs
def RNN(x, weights, biases, init_state):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])  #(n_steps , batch_size,  n_input)
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden)
    # This input shape is required by `rnn` function
    x = tf.split(0, n_steps, x)
    '''
    个人觉得上面的三行代码是最难理解的,具体的reshape 的demo可以看1_Introduction中的basic_op.
    最后转化成了每一副图像的第一行拿出来作为一个矩阵, 这样正好满足了[batch_size, cell.input_zise]的要求的格式,
    具体的逻辑处理在rnn.rnn函数里边
    '''

    # Define a lstm cell with tensorflow
    lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.rnn(lstm_cell,
                              x,
                              initial_state=init_state,
                              dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1],
                     weights['out']) + biases['out'], lstm_cell.state_size
Exemple #17
0
        def _shared_layer(input_data, config):
            """Build the model to decoding

            Args:
                input_data = size batch_size X num_steps X embedding size

            Returns:
                output units
            """
            cell = rnn_cell.BasicLSTMCell(config.encoder_size)

            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, config.num_steps, input_data)
            ]

            if is_training and config.keep_prob < 1:
                cell = rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=config.keep_prob)

            cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

            initial_state = cell.zero_state(config.batch_size, tf.float32)

            encoder_outputs, encoder_states = rnn.rnn(
                cell, inputs, initial_state=initial_state, scope="encoder_rnn")

            return encoder_outputs, initial_state
Exemple #18
0
    def __init__(self,
                 vocab_size,
                 size=256,
                 depth=2,
                 learning_rate=1e-4,
                 batch_size=32,
                 keep_prob=0.1,
                 num_steps=100,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep Bidirectional LSTM model.
    
    Args:
      vocab_size: int, The dimensionality of the input vocab
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      num_steps: int, The max time unit [100]
    """
        super(DeepBiLSTM, self).__init__()

        self.vocab_size = int(vocab_size)
        self.size = int(size)
        self.depth = int(depth)
        self.learning_rate = float(learning_rate)
        self.batch_size = int(batch_size)
        self.keep_prob = float(keep_prob)
        self.num_steps = int(seq_length)

        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.num_steps])
        self.input_lengths = tf.placeholder(tf.int64, [self.batch_size])

        with tf.device("/cpu:0"):
            self.emb = tf.Variable(tf.truncated_normal(
                [self.vocab_size, self.size], -0.1, 0.1),
                                   name='emb')
            import ipdb
            ipdb.set_trace()
            self.embed_inputs = tf.nn.embedding_lookup(
                self.emb, tf.transpose(self.inputs))

        self.cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        self.stacked_cell = rnn_cell.MultiRNNCell([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, tf.float32)

        if not forward_only and self.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(lstm_cell,
                                                output_keep_prob=keep_prob)

        self.outputs, self.states = rnn.rnn(self.stacked_cell,
                                            tf.unpack(self.embed_inputs),
                                            dtype=tf.float32,
                                            sequence_length=self.input_lengths,
                                            initial_state=self.initial_state)

        output = tf.reduce_sum(tf.pack(self.output), 0)
    def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, bias_init_vector=None):

        self.dim_image = np.int(dim_image)
        self.dim_embed = np.int(dim_embed)
        self.dim_hidden = np.int(dim_hidden)
        self.batch_size = np.int(batch_size)
        self.n_lstm_steps = np.int(n_lstm_steps)
        self.n_words = np.int(n_words)

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb')

        self.bemb = self.init_bias(dim_embed, name='bemb')

        self.lstm = rnn_cell.BasicLSTMCell(dim_hidden)

        #self.encode_img_W = self.init_weight(dim_image, dim_hidden, name='encode_img_W')
        self.encode_img_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W')
        self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')

        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
Exemple #20
0
    def testEmbeddingTiedRNNSeq2Seq(self):
        with self.test_session() as sess:
            with tf.variable_scope("root",
                                   initializer=tf.constant_initializer(0.5)):
                enc_inp = [
                    tf.constant(1, tf.int32, shape=[2]) for i in xrange(2)
                ]
                dec_inp = [
                    tf.constant(i, tf.int32, shape=[2]) for i in xrange(3)
                ]
                cell = rnn_cell.BasicLSTMCell(2)
                dec, mem = seq2seq.embedding_tied_rnn_seq2seq(
                    enc_inp, dec_inp, cell, 5)
                sess.run([tf.variables.initialize_all_variables()])
                res = sess.run(dec)
                self.assertEqual(len(res), 3)
                self.assertEqual(res[0].shape, (2, 5))

                res = sess.run(mem)
                self.assertEqual(len(res), 4)
                self.assertEqual(res[0].shape, (2, 4))

                # Test externally provided output projection.
                w = tf.get_variable("proj_w", [2, 5])
                b = tf.get_variable("proj_b", [5])
                with tf.variable_scope("proj_seq2seq"):
                    dec, _ = seq2seq.embedding_tied_rnn_seq2seq(
                        enc_inp, dec_inp, cell, 5, output_projection=(w, b))
                sess.run([tf.variables.initialize_all_variables()])
                res = sess.run(dec)
                self.assertEqual(len(res), 3)
                self.assertEqual(res[0].shape, (2, 2))

                # Test that previous-feeding model ignores inputs after the first.
                dec_inp2 = [
                    tf.constant(0, tf.int32, shape=[2]) for _ in xrange(3)
                ]
                tf.get_variable_scope().reuse_variables()
                d1, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp,
                                                           dec_inp,
                                                           cell,
                                                           5,
                                                           feed_previous=True)
                d2, _ = seq2seq.embedding_tied_rnn_seq2seq(enc_inp,
                                                           dec_inp2,
                                                           cell,
                                                           5,
                                                           feed_previous=True)
                d3, _ = seq2seq.embedding_tied_rnn_seq2seq(
                    enc_inp,
                    dec_inp2,
                    cell,
                    5,
                    feed_previous=tf.constant(True))
                res1 = sess.run(d1)
                res2 = sess.run(d2)
                res3 = sess.run(d3)
                self.assertAllClose(res1, res2)
                self.assertAllClose(res1, res3)
Exemple #21
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        size = config.n_hidden
        num_steps = config.num_steps
        self._input_data = tf.placeholder(tf.float32,
                                          (batch_size, config.num_steps))
        self._targets = tf.placeholder(tf.float32, [batch_size, 1])
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=2.8)
        # lstm_cell = rnn_cell.LSTMCell(size, 1)
        # cell = lstm_cell
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._train_op = tf.no_op()
        self._result = -1

        weights_hidden = tf.constant(
            1.0, shape=[config.num_features, config.n_hidden])
        weights_hidden = tf.get_variable(
            "weights_hidden", [config.num_features, config.n_hidden])
        inputs = []
        for k in range(num_steps):
            nextitem = tf.matmul(
                tf.reshape(self._input_data[:, k],
                           [config.batch_size, config.num_features]),
                weights_hidden)
            inputs.append(nextitem)

        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)
        #output = tf.reshape(tf.concat(1, outputs), [-1, config.n_hidden])

        #pred = tf.matmul(outputs[-1], tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1])

        output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size])
        #pred = tf.matmul(output, tf.get_variable("weights_out", [config.n_hidden,1])) + tf.get_variable("bias_out", [1])
        pred = tf.sigmoid(
            tf.matmul(outputs[-1],
                      tf.get_variable("weights_out", [config.n_hidden, 1])) +
            tf.get_variable("bias_out", [1]))
        self._pred = pred

        self._final_state = states[-1]
        self._cost = cost = tf.square((pred[:, 0] - self.targets[:, 0]))
        self._result = tf.abs(pred[0, 0] - self.targets[0, 0])

        # self._cost = cost = tf.abs(pred[0, 0] - self.targets[0,0])

        if not config.is_training:
            return

        #optimizer = tf.train.GradientDescentOptimizer(learning_rate = config.learning_rate).minimize(cost)
        optimizer = tf.train.AdamOptimizer().minimize(cost)
        self._train_op = optimizer
        print("top ", self._train_op)
Exemple #22
0
    def __init__(self,
                 rnn_size,
                 num_layers,
                 vocab_size,
                 grad_clip,
                 batch_size=1,
                 seq_length=1):

        cell = rnn_cell.BasicLSTMCell(rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable('softmax_w', [rnn_size, vocab_size])
            softmax_b = tf.get_variable('softmax_b', [vocab_size])
            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding',
                                            [vocab_size, rnn_size])
                inputs = tf.split(
                    1, seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        train = batch_size == 1 and seq_length == 1
        loop_fn = loop if train else None

        outputs, last_state = seq2seq.rnn_decoder(inputs,
                                                  self.initial_state,
                                                  cell,
                                                  loop_function=loop_fn,
                                                  scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #23
0
    def BiLSTMgraph(self, _X, _C, _T, _istate_fw, _istate_bw, _weights,
                    _biases):
        # input: a [len_sent,len_seq] (e.g. 7x5)
        # transform into embeddings
        if _T:
            emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X)
            emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C)
            emb_t = tf.nn.embedding_lookup(self._weights['t_emb'], _T)
            # Linear activation
            _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul(
                emb_c, self._weights['hidden_c']) + tf.matmul(
                    emb_t,
                    self._weights['hidden_t']) + self._biases['hidden_b']
        else:
            emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X)
            emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C)
            # Linear activation
            _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul(
                emb_c, self._weights['hidden_c']) + self._biases['hidden_b']

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell,
                                                     output_keep_prob=0.5)
        # Backward direction cell
        lstm_bw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell,
                                                     output_keep_prob=0.5)
        # Split data because rnn cell needs a list of inputs for the RNN inner loop
        _X = tf.split(0, self.sent_max_len, _X)

        # Get lstm cell output
        outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                        lstm_bw_cell,
                                        _X,
                                        initial_state_fw=self.istate_fw,
                                        initial_state_bw=self.istate_bw,
                                        sequence_length=self.seq_len)

        return outputs
Exemple #24
0
def lstm_model(_weights, _biases, _Wemb, _config):

    _image = tf.placeholder(tf.float32,
                            [_config.batch_size, _config.dim_image])
    _sentence = tf.placeholder(tf.int32,
                               [_config.batch_size, _config.maxlen + 2])
    _mask = tf.placeholder(tf.float32,
                           [_config.batch_size, _config.maxlen + 2])

    lstm = rnn_cell.BasicLSTMCell(_config.dim_hidden)
    image_emb = tf.matmul(_image, _weights['encoding_img_W']) + _biases[
        'encoding_img_b']  # (batch_size, dim_hidden)

    state = tf.zeros([_config.batch_size, lstm.state_size])

    _loss = 0.0

    with tf.variable_scope("RNN"):
        for i in range(_config.maxlen + 2):  # maxlen + 1
            if i == 0:
                current_emb = image_emb
            else:
                with tf.device("/cpu:0"):
                    current_emb = tf.nn.embedding_lookup(
                        _Wemb, _sentence[:, i - 1]) + _biases['bemb']

            if i > 0: tf.get_variable_scope().reuse_variables()

            output, state = lstm(current_emb,
                                 state)  # (batch_size, dim_hidden)

            if i > 0:
                labels = tf.expand_dims(_sentence[:, i], 1)  # (batch_size)
                indices = tf.expand_dims(tf.range(0, _config.batch_size, 1), 1)
                concated = tf.concat(1, [indices, labels])
                onehot_labels = tf.sparse_to_dense(
                    concated, tf.pack([_config.batch_size, _config.n_words]),
                    1.0, 0.0)  # (batch_size, n_words)

                logit_words = tf.matmul(
                    output, _weights['embed_word_W']) + _biases[
                        'embed_word_b']  # (batch_size, n_words)
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                    logit_words, onehot_labels)
                cross_entropy = cross_entropy * _mask[:,
                                                      i]  #tf.expand_dims(mask, 1)

                current_loss = tf.reduce_sum(cross_entropy)
                _loss = _loss + current_loss

        _loss = _loss / tf.reduce_sum(_mask[:, 1:])

        return _loss, _image, _sentence, _mask
    def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, bias_init_vector = None):
        self.dim_image = dim_image
        self.n_words = n_words
        self.dim_hidden = dim_hidden
        self.batch_size = batch_size
        self.n_lstm_steps = n_lstm_steps

        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name = 'Wemb')

        self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden)
        self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden)

        self.encode_image_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name = 'encode_image_W')
        self.encode_image_b = tf.Variable(tf.zeros([dim_hidden]), name = 'encode_image_b')

        self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name = 'embed_word_W')
        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name = 'embed_word_b')
        else:
            self.embed_word_b = tf.Variable(tf.zeros([n_words]), name = 'embed_word_b')
    def __init__(self, config, is_training):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        if is_training and config.keep_prob < 1:
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=config.keep_prob)

        self.cell = cell

        self.input_data = tf.placeholder(dtype=tf.float32,
                                         shape=[None, num_steps, 1])
        self.target_data = tf.placeholder(dtype=tf.float32,
                                          shape=[None, num_steps, 1])
        self.initial_state = cell.zero_state(batch_size=config.batch_size,
                                             dtype=tf.float32)

        inputs = tf.split(1, num_steps, self.input_data)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        with tf.variable_scope('rnnvm'):
            output_w = tf.get_variable("output_w", [size, 1])
            output_b = tf.get_variable("output_b", [1])

        outputs, states = seq2seq.rnn_decoder(inputs,
                                              self.initial_state,
                                              cell,
                                              scope='rnnvm')

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        output = tf.nn.xw_plus_b(output, output_w, output_b)

        entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            output,
            tf.reshape(self.target_data, shape=[num_steps * batch_size, 1]))

        self.cost = cost = tf.reduce_mean(entropy)
        self.final_state = states[-1]

        if not is_training:
            return

        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #27
0
def build_lstm_inner(lstm_input, H):
    lstm_size = H['arch']['lstm_size']
    lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=0.0)
    batch_size = H['arch']['batch_size'] * H['arch']['grid_height'] * H['arch']['grid_width']
    state = tf.zeros([batch_size, lstm.state_size])

    outputs = []
    with tf.variable_scope('RNN'):
        for time_step in range(H['arch']['rnn_len']):
            if time_step > 0: tf.get_variable_scope().reuse_variables()
            output, state = lstm(lstm_input, state)
            outputs.append(output)
    return outputs
Exemple #28
0
def rnn_model(X, init_state, lstm_size, slicing_tensors):
    # X, input shape: (batch_size, input_vec_size, time_step_size)
    # print "X shape", X.get_shape().as_list()
    XT = tf.transpose(X, [1, 0, 2])  # permute time_step_size and batch_size
    # XT shape: (input_vec_size, batch_szie, time_step_size)
    # print "XT shape", XT.get_shape().as_list()

    XR = tf.reshape(
        XT,
        [-1, lstm_size])  # each row has input for each lstm cell (lstm_size)
    # XR shape: (input vec_size, batch_size)
    # print sess.run(num_steps)
    # print "XR shape", XR.get_shape().as_list()

    X_split = tf.split(0, n_lstm_steps,
                       XR)  # split them to time_step_size (28 arrays)
    # Each array shape: (batch_size, input_vec_size)
    # print "X_split"
    # print len(X_split)
    # print X_split

    # Make lstm with lstm_size (each input vector size)
    lstm = rnn_cell.BasicLSTMCell(lstm_size, forget_bias=1.0)

    # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)
    outputs, _states = rnn.rnn(lstm, X_split, initial_state=init_state)
    # print  "outputs", outputs[0].get_shape()
    outputs = tf.reshape(tf.concat(0, outputs),
                         [n_lstm_steps, batch_size, dim_hidden])
    # Linear activation is NOT REQUIRED!!
    # Get the last output.
    # print "outputs"
    # print len(outputs)
    # print outputs

    # Slicing the appropriate output vectors from the <outputs>
    # sliced_outputs = [tf.slice(outputs[break_points[i]-1], slicing_lengths[i][0], slicing_lengths[i][1]) for i in range(batch_size)]
    slicing_tensors = [
        tf.squeeze(tsr) for tsr in tf.split(0, batch_size, slicing_tensors)
    ]
    # print  "slicing_tensors", slicing_tensors[0].get_shape()
    sliced_outputs = [
        tf.slice(outputs, begin=tensor, size=[1, 1, dim_hidden])
        for tensor in slicing_tensors
    ]
    # for begin,size in slicing_lengths:
    # print tf.slice(outputs, begin, size)

    # return outputs[-1], lstm.state_size # State size to initialize the state
    # return tf.squeeze(tf.concat(0, sliced_outputs)), lstm.state_size
    return sliced_outputs, lstm.state_size
Exemple #29
0
        def _chunk_private(encoder_units, pos_prediction, config):
            """Decode model for chunks

            Args:
                encoder_units - these are the encoder units:
                [batch_size X encoder_size] with the one the pos prediction
                pos_prediction:
                must be the same size as the encoder_size

            returns:
                logits
            """
            # concatenate the encoder_units and the pos_prediction

            pos_prediction = tf.reshape(
                pos_prediction, [batch_size, num_steps, pos_embedding_size])
            chunk_inputs = tf.concat(2, [pos_prediction, encoder_units])

            with tf.variable_scope("chunk_decoder"):
                cell = rnn_cell.BasicLSTMCell(config.chunk_decoder_size,
                                              forget_bias=1.0)

                if is_training and config.keep_prob < 1:
                    cell = rnn_cell.DropoutWrapper(
                        cell, output_keep_prob=config.keep_prob)

                initial_state = cell.zero_state(config.batch_size, tf.float32)

                # this function puts the 3d tensor into a 2d tensor: batch_size x input size
                inputs = [
                    tf.squeeze(input_, [1])
                    for input_ in tf.split(1, config.num_steps, chunk_inputs)
                ]

                decoder_outputs, decoder_states = rnn.rnn(
                    cell,
                    inputs,
                    initial_state=initial_state,
                    scope="chunk_rnn")

                output = tf.reshape(tf.concat(1, decoder_outputs),
                                    [-1, config.chunk_decoder_size])

                softmax_w = tf.get_variable(
                    "softmax_w",
                    [config.chunk_decoder_size, config.num_chunk_tags])
                softmax_b = tf.get_variable("softmax_b",
                                            [config.num_chunk_tags])
                logits = tf.matmul(output, softmax_w) + softmax_b

            return logits, decoder_states
Exemple #30
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size

        self._input_data = tf.placeholder(tf.float32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.float32, [batch_size, num_steps])

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        iw = tf.get_variable("input_w", [1, size])
        ib = tf.get_variable("input_b", [size])
        inputs = [
            tf.nn.xw_plus_b(i_, iw, ib)
            for i_ in tf.split(1, num_steps, self._input_data)
        ]
        if is_training and config.keep_prob < 1:
            inputs = [
                tf.nn.dropout(input_, config.keep_prob) for input_ in inputs
            ]

        outputs, states = rnn.rnn(cell,
                                  inputs,
                                  initial_state=self._initial_state)
        rnn_output = tf.reshape(tf.concat(1, outputs), [-1, size])

        self._output = output = tf.nn.xw_plus_b(
            rnn_output, tf.get_variable("out_w", [size, 1]),
            tf.get_variable("out_b", [1]))

        self._cost = cost = tf.reduce_mean(
            tf.square(output - tf.reshape(self._targets, [-1])))
        self._final_state = states[-1]

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))