def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    try:
        outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                              dtype=tf.float32)
    except Exception: # Old TensorFlow version only returns outputs not states
        outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                        dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Esempio n. 2
0
def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    try:
        outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell,
                                              lstm_bw_cell,
                                              x,
                                              dtype=tf.float32)
    except Exception:  # Old TensorFlow version only returns outputs not states
        outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                        lstm_bw_cell,
                                        x,
                                        dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
def blstm_layer(_X, _x_length, batch_s):
    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input])  # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, hidden_weights) + hidden_biases

    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden_layer, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden_layer, forget_bias=1.0)

    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, max_input_timesteps, _X)  # n_steps * (batch_size, n_hidden)

    istate_fw = lstm_fw_cell.zero_state(batch_s, tf.float32)
    istate_bw = lstm_bw_cell.zero_state(batch_s, tf.float32)

    # Get lstm cell output
    outputs, output_state_fw, output_state_bw = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                                                      initial_state_fw=istate_fw,
                                                                      initial_state_bw=istate_bw,
                                                                      sequence_length=_x_length
                                                                      )

    outputs = tf.concat(0, outputs)
    activation = tf.matmul(outputs, output_weights) + output_biases

    return tf.reshape(activation, [max_input_timesteps, batch_s, n_output_classes])
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input])  # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X)  # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                    lstm_bw_cell,
                                    _X,
                                    initial_state_fw=_istate_fw,
                                    initial_state_bw=_istate_bw)
    # Linear activation
    # Get inner loop last output
    output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs]
    return output
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases):

     # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                            initial_state_fw=_istate_fw,
                                            initial_state_bw=_istate_bw)

    # Linear activation
    # Get inner loop last output
    output = [tf.matmul(o, _weights['out']) + _biases['out'] for o in outputs]
    return output
Esempio n. 6
0
    def __init__(self, config):
        sent_len = config.sent_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        num_layers = config.num_layers
        state_size = config.state_size
        keep_prob = config.keep_prob

        self.input_data = tf.placeholder(tf.int32, [batch_size, sent_len])
        self.lengths = tf.placeholder(tf.int64, [batch_size])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embeding = tf.get_variable("embeding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embeding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer())
            back_cell = rnn_cell.LSTMCell(state_size, input_size=embed_size, initializer=tf.contrib.layers.xavier_initializer())
            cell = rnn_cell.DropoutWrapper(
              cell, input_keep_prob=keep_prob,
                         output_keep_prob=keep_prob)
            back_cell = rnn_cell.DropoutWrapper(
              back_cell, input_keep_prob=keep_prob,
                              output_keep_prob=keep_prob) 
            cell = rnn_cell.MultiRNNCell([cell] * num_layers)
            backcell = rnn_cell.MultiRNNCell([back_cell] * num_layers)
            
            rnn_splits = [tf.squeeze(input_, [1]) for input_ in tf.split(1, sent_len, inputs)]

            # Run the bidirectional rnn
            outputs, last_fw_state, last_bw_state = rnn.bidirectional_rnn(
                                                        cell, backcell, rnn_splits,
                                                        sequence_length=self.lengths,
                                                        dtype=tf.float32)
        
        sent_out = tf.concat(1, [last_fw_state, last_bw_state])
        #sent_out = outputs[-1]
        #sent_out = tf.add_n(outputs)
        output_size = state_size*4

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [output_size, 1])
            b = tf.get_variable("b", [1], initializer=tf.constant_initializer(0.0))
            raw_logits = tf.matmul(sent_out, w) + b 
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite
        optimizer = tf.train.AdamOptimizer()
        grads, _vars = zip(*optimizer.compute_gradients(self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                      config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size,
          _seq_len):

    # BiRNN requires to supply sequence_length as [batch_size, int64]
    # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set
    # For a better implementation with latest version of tensorflow, check below
    _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64))

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input])  # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X)  # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                    lstm_bw_cell,
                                    _X,
                                    initial_state_fw=_istate_fw,
                                    initial_state_bw=_istate_bw,
                                    sequence_length=_seq_len)

    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
Esempio n. 8
0
    def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases):

        # input shape: (batch_size, n_steps, n_input)
        _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
        # Reshape to prepare input to hidden activation
        # (n_steps*batch_size, n_input)
        _X = tf.reshape(_X, [-1, self.config.num_input])
        # Linear activation
        _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

        # Forward direction cell
        rnn_fw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)
        # Backward direction cell
        rnn_bw_cell = rnn_cell.BasicLSTMCell(self.config.num_hidden)

        # Split data because rnn cell needs a list of inputs for the RNN inner
        # loop
        # n_steps * (batch_size, n_hidden)
        _X = tf.split(0, self.config.num_steps, _X)

        # Get lstm cell output
        outputs, final_fw, final_bw = rnn.bidirectional_rnn(
            rnn_fw_cell,
            rnn_bw_cell,
            _X,
            initial_state_fw=_istate_fw,
            initial_state_bw=_istate_bw)
        # Linear activation
        return [
            tf.matmul(output, _weights['out']) + _biases['out']
            for output in outputs
        ], final_fw, final_bw
    def BiRNN(self, _X, _istate_fw, _istate_bw, _weights, _biases):

        # input shape: (batch_size, n_steps, n_input)
        _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
        # Reshape to prepare input to hidden activation
        # (n_steps*batch_size, n_input)
        _X = tf.reshape(_X, [-1, self.config.num_input])
        # Linear activation
        _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

        # Forward direction cell
        single_fw_cell = BasicRNNCellReLu(self.config.num_hidden)
        single_fw_cell = rnn_cell.DropoutWrapper(single_fw_cell, self.config.input_keep_prob, self.config.output_keep_prob, 0.8)
        rnn_fw_cell = rnn_cell.MultiRNNCell(
            [single_fw_cell]*self.config.model_depth)
        # Backward direction cell
        single_bw_cell = BasicRNNCellReLu(self.config.num_hidden)
        single_bw_cell = rnn_cell.DropoutWrapper(single_bw_cell, self.config.input_keep_prob, self.config.output_keep_prob)
        rnn_bw_cell = rnn_cell.MultiRNNCell(
            [single_bw_cell]*self.config.model_depth)

        # Split data because rnn cell needs a list of inputs for the RNN inner
        # loop
        # n_steps * (batch_size, n_hidden)
        _X = tf.split(0, self.config.num_steps, _X)

        # Get lstm cell output
        outputs, final_fw, final_bw = rnn.bidirectional_rnn(
                                        rnn_fw_cell, rnn_bw_cell, _X,
                                        initial_state_fw=_istate_fw,
                                        initial_state_bw=_istate_bw)
        # Linear activation
        return [tf.matmul(output, _weights['out']) + _biases['out']
                for output in outputs], final_fw, final_bw
Esempio n. 10
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError(
             "cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] *
                                             num_layers)
         # backward direction cell
         rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] *
                                             num_layers)
         # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
         encoding = rnn.bidirectional_rnn(rnn_fw_cell,
                                          rnn_bw_cell,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)
     else:
         cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         _, encoding = rnn.rnn(cell,
                               X,
                               dtype=tf.float32,
                               sequence_length=sequence_length,
                               initial_state=initial_state)
     return target_predictor_fn(encoding[-1], y)
def BiRNN(x, weights, biases):

    # Prepare data shape to match `bidirectional_rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshape to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden)
    x = tf.split(0, n_steps, x)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)

    # Get lstm cell output
    '''
    重点在这,上边创建了两个完全一样的lstm_cell但是所有的逻辑处理都在bidirectional_rnn这个函数里边,不用自己关心那个是feed哪个是back
    '''
    outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                    dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
def BiRNN(_X, _istate_fw, _istate_bw, _weights, _biases, _batch_size, _seq_len):

    # BiRNN requires to supply sequence_length as [batch_size, int64]
    # Note: Tensorflow 0.6.0 requires BiRNN sequence_length parameter to be set
    # For a better implementation with latest version of tensorflow, check below
    _seq_len = tf.fill([_batch_size], constant(_seq_len, dtype=tf.int64))

    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
    # Linear activation
    _X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)

    # Get lstm cell output
    outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,
                                            initial_state_fw=_istate_fw,
                                            initial_state_bw=_istate_bw,
                                            sequence_length=_seq_len)

    # Linear activation
    # Get inner loop last output
    return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
Esempio n. 13
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError("cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         # backward direction cell
         rnn_bw_cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
         encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell,
                                          sequence_length=sequence_length,
                                          initial_state=initial_state)
     else:
         cell = rnn_cell.MultiRNNCell([cell_fn(rnn_size)] * num_layers)
         _, encoding = rnn.rnn(cell, X, dtype=tf.float32,
                               sequence_length=sequence_length,
                               initial_state=initial_state)
     return target_predictor_fn(encoding[-1], y)
Esempio n. 14
0
def bidirectional_lstm(inputs,keep_prob,INPUT_SIZE,HIDDEN_SIZE,SEQ_LENGTH):
    initializer = tf.random_uniform_initializer(-0.01,0.01)
    cell_F = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer)
    cell_B = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer)
    inputs_ = [tf.nn.dropout(each,keep_prob) for each in inputs]
    outputs = bidirectional_rnn(cell_F, cell_B, inputs_, initial_state_fw=None, initial_state_bw=None, sequence_length=None,dtype=tf.float32)
    return outputs
Esempio n. 15
0
File: dcnn.py Progetto: AntNLP/opie
    def BiLSTM(self, x, n_steps, n_input, seq_len):
        x =  tf.transpose(x, [1, 0, 2])
        x = tf.reshape(x, [-1, n_input])
        x = tf.split(0, n_steps, x)
        lstm_fw_cell = rnn_cell.BasicLSTMCell(self.n_hidden, forget_bias=1.0)
        lstm_bw_cell = rnn_cell.BasicLSTMCell(self.n_hidden, forget_bias=1.0)
        outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                            dtype=tf.float32, sequence_length=seq_len)

        outputs = tf.pack(outputs)
        outputs = tf.transpose(outputs, [1, 0, 2])
        return self.last_relevant(outputs, seq_len)
Esempio n. 16
0
def bidirectional_lstm(inputs, keep_prob, INPUT_SIZE, HIDDEN_SIZE, SEQ_LENGTH):
    initializer = tf.random_uniform_initializer(-0.01, 0.01)
    cell_F = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer)
    cell_B = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer)
    inputs_ = [tf.nn.dropout(each, keep_prob) for each in inputs]
    outputs = bidirectional_rnn(cell_F,
                                cell_B,
                                inputs_,
                                initial_state_fw=None,
                                initial_state_bw=None,
                                sequence_length=None,
                                dtype=tf.float32)
    return outputs
Esempio n. 17
0
    def __init__(self, conf):
        self.conf = conf

        cell_fw = BasicLSTMCell(self.conf.rnn_size)
        cell_bw = BasicLSTMCell(self.conf.rnn_size)
        
        if conf.keep_prob < 1.0 and not conf.infer:
            cell_fw = DropoutWrapper(cell_fw, output_keep_prob=conf.keep_prob)
            cell_bw = DropoutWrapper(cell_bw, output_keep_prob=conf.keep_prob)
        self.cell_fw = cell_fw = MultiRNNCell([cell_fw] * self.conf.num_layers)
        self.cell_bw = cell_bw = MultiRNNCell([cell_bw] * self.conf.num_layers)
        
        self.input_data = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
        self.targets = tf.placeholder(tf.int32, [self.conf.batch_size, self.conf.seq_length])
    
        self.initial_state_fw = cell_fw.zero_state(self.conf.batch_size, tf.float32)
        
        self.initial_state_bw = cell_bw.zero_state(self.conf.batch_size, tf.float32)
        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable("softmax_w", [self.conf.rnn_size*2, self.conf.output_size])
            softmax_b = tf.get_variable("softmax_b", [self.conf.output_size])
        
        embedding = tf.get_variable("embedding", [self.conf.nerloader.vocab_size, self.conf.rnn_size])
        _inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if conf.keep_prob < 1.0 and not conf.infer:
            _inputs = tf.nn.dropout(_inputs,conf.keep_prob)
        inputs = tf.split(1, conf.seq_length, _inputs)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
            
        outputs_bi = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=self.initial_state_fw, initial_state_bw=self.initial_state_bw, scope='rnn')
        output = tf.reshape(tf.concat(1, outputs_bi), [-1, self.conf.rnn_size*2])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)

        self.loss_weights = [tf.ones([self.conf.batch_size * self.conf.seq_length])]

        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                self.loss_weights)
        self.cost = (tf.reduce_sum(loss) / self.conf.batch_size / self.conf.seq_length)
        tf.scalar_summary("loss",self.cost)
        self.out = output
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                self.conf.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.merged_summary_op = tf.merge_all_summaries()
Esempio n. 18
0
        def BiRNN(inputs, _seq_length):

            # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size)
            inputs = tf.transpose(inputs, [1, 0, 2])
            # Reshape before feeding to hidden activation layers
            inputs = tf.reshape(inputs, [-1, embedding_size])
            # Hidden activation
            #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden'])
            # Split the inputs to make a list of inputs for the rnn
            inputs = tf.split(0, seq_width,
                              inputs)  # seq_width * (batch_size, n_hidden)

            initializer = tf.random_uniform_initializer(-1, 1)

            with tf.variable_scope('forward'):
                #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
                #lstm1 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #lstm2 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #fw_cell = rnn_cell.MultiRNNCell([lstm1, lstm2])
                fw_cell = rnn_cell.LSTMCell(n_hidden,
                                            embedding_size,
                                            initializer=initializer)
            with tf.variable_scope('backward'):
                #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
                #lstm3 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #lstm4 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #bw_cell = rnn_cell.MultiRNNCell([lstm3, lstm4])
                bw_cell = rnn_cell.LSTMCell(n_hidden,
                                            embedding_size,
                                            initializer=initializer)

            # Get lstm cell output
            outputs, _, _ = rnn.bidirectional_rnn(fw_cell,
                                                  bw_cell,
                                                  inputs,
                                                  dtype="float32",
                                                  sequence_length=_seq_length)
            outputs_tensor = tf.reshape(tf.concat(0, outputs),
                                        [-1, 2 * n_hidden])

            logits = []

            for i in xrange(len(outputs)):
                final_transformed_val = tf.matmul(
                    outputs[i], weights['out']) + biases['out']
                logits.append(final_transformed_val)
            logits = tf.reshape(tf.concat(0, logits), [-1, n_classes])

            return logits, outputs_tensor
Esempio n. 19
0
    def prediction(self):
        fw_cell = rnn_cell.LSTMCell(self._num_hidden)
        fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout)
        bw_cell = rnn_cell.LSTMCell(self._num_hidden)
        bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout)

        if self._num_layers > 1:
            fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers)
            bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers)

        output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length)
        max_length = int(self.target.get_shape()[1])
        num_classes = int(self.target.get_shape()[2])
        weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes)
        output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden])
        prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
        prediction = tf.reshape(prediction, [-1, max_length, num_classes])
        return prediction
Esempio n. 20
0
    def prediction(self):
        fw_cell = rnn_cell.LSTMCell(self._num_hidden)
        fw_cell = rnn_cell.DropoutWrapper(fw_cell, output_keep_prob=self.dropout)
        bw_cell = rnn_cell.LSTMCell(self._num_hidden)
        bw_cell = rnn_cell.DropoutWrapper(bw_cell, output_keep_prob=self.dropout)

        if self._num_layers > 1:
            fw_cell = rnn_cell.MultiRNNCell([fw_cell] * self._num_layers)
            bw_cell = rnn_cell.MultiRNNCell([bw_cell] * self._num_layers)

        output, _, _ = rnn.bidirectional_rnn(fw_cell, bw_cell, tf.unpack(tf.transpose(self.data, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.length)
        max_length = int(self.target.get_shape()[1])
        num_classes = int(self.target.get_shape()[2])
        weight, bias = self._weight_and_bias(2*self._num_hidden, num_classes)
        output = tf.reshape(tf.transpose(tf.pack(output), perm=[1, 0, 2]), [-1, 2*self._num_hidden])
        prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
        prediction = tf.reshape(prediction, [-1, max_length, num_classes])
        return prediction
Esempio n. 21
0
    def build_model (self, is_training=True):
        batch_size = self.batch_size
        n_steps = self.num_steps
        size = self.n_hidden
        config = self.config


        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=1.0)

        
        # add dropout to output
        if is_training and config.keep_prob < 1:
          lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
              lstm_cell, output_keep_prob=config.keep_prob)
        

        #cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        cell_fw = cell_bw = lstm_cell

        initial_state = lstm_cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
          embedding = tf.get_variable("embedding", [self.vocab_size, size])
          inputs = tf.nn.embedding_lookup(embedding, self._inputs)

        
        if is_training and config.keep_prob < 1:
          inputs = tf.nn.dropout(inputs, config.keep_prob)
        

        # Build RNN
        inputs = [tf.squeeze(input_, [1])   for input_ in tf.split(1, n_steps, inputs)]
        #outputs, state = rnn.rnn(cell, inputs, initial_state=initial_state)
        outputs_pair, state_fw, state_bw = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs, \
                      initial_state_fw=initial_state, initial_state_bw=initial_state)
        #              dtype=None, sequence_length=None, scope=None):

        outputs = []
        for out in outputs_pair:
            out_fw, out_bw = tf.split(1, 2, out)
            outputs.append (1.0 * out_fw + 0.0 * out_bw)
        #outputs_fw = [ [0] for out in outputs_pair]
        self.outputs = outputs
        return self.outputs
Esempio n. 22
0
    def BiLSTMgraph(self, _X, _C, _T, _istate_fw, _istate_bw, _weights,
                    _biases):
        # input: a [len_sent,len_seq] (e.g. 7x5)
        # transform into embeddings
        if _T:
            emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X)
            emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C)
            emb_t = tf.nn.embedding_lookup(self._weights['t_emb'], _T)
            # Linear activation
            _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul(
                emb_c, self._weights['hidden_c']) + tf.matmul(
                    emb_t,
                    self._weights['hidden_t']) + self._biases['hidden_b']
        else:
            emb_x = tf.nn.embedding_lookup(self._weights['w_emb'], _X)
            emb_c = tf.nn.embedding_lookup(self._weights['c_emb'], _C)
            # Linear activation
            _X = tf.matmul(emb_x, self._weights['hidden_w']) + tf.matmul(
                emb_c, self._weights['hidden_c']) + self._biases['hidden_b']

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell,
                                                     output_keep_prob=0.5)
        # Backward direction cell
        lstm_bw_cell = rnn_cell.BasicLSTMCell(self.num_hidden, forget_bias=1.0)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell,
                                                     output_keep_prob=0.5)
        # Split data because rnn cell needs a list of inputs for the RNN inner loop
        _X = tf.split(0, self.sent_max_len, _X)

        # Get lstm cell output
        outputs = rnn.bidirectional_rnn(lstm_fw_cell,
                                        lstm_bw_cell,
                                        _X,
                                        initial_state_fw=self.istate_fw,
                                        initial_state_bw=self.istate_bw,
                                        sequence_length=self.seq_len)

        return outputs
Esempio n. 23
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError("cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = cell_fn(rnn_size)
         # backward direction cell
         rnn_bw_cell = cell_fn(rnn_size)
         encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell)
     else:
         cell = cell_fn(rnn_size)
         _, encoding = rnn.rnn(cell, X, dtype=tf.float32)
     return target_predictor_fn(encoding[-1], y)
Esempio n. 24
0
 def rnn_estimator(X, y):
     """RNN estimator with target predictor function on top."""
     X = input_op_fn(X)
     if cell_type == 'rnn':
         cell_fn = rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = rnn_cell.BasicLSTMCell
     else:
         raise ValueError(
             "cell_type {} is not supported. ".format(cell_type))
     if bidirection:
         # forward direction cell
         rnn_fw_cell = cell_fn(rnn_size)
         # backward direction cell
         rnn_bw_cell = cell_fn(rnn_size)
         encoding = rnn.bidirectional_rnn(rnn_fw_cell, rnn_bw_cell)
     else:
         cell = cell_fn(rnn_size)
         _, encoding = rnn.rnn(cell, X, dtype=tf.float32)
     return target_predictor_fn(encoding[-1], y)
    def BiRNN (inputs, _seq_length):

        # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size)
        inputs = tf.transpose(inputs, [1, 0, 2])
        # Reshape before feeding to hidden activation layers
        inputs = tf.reshape(inputs, [-1, embedding_size])
        # Hidden activation
        #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden'])
        # Split the inputs to make a list of inputs for the rnn
        inputs = tf.split(0, seq_width, inputs) # seq_width * (batch_size, n_hidden)

        initializer = tf.random_uniform_initializer(-1,1)

        with tf.variable_scope('forward'):
            #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
            fw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
        with tf.variable_scope('backward'):
            #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
            bw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)

        # Get lstm cell output
        outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype="float32", sequence_length=_seq_length)
        outputs_tensor = tf.reshape(tf.concat(0, outputs),[-1, 2*n_hidden])

        logits = []

        for i in xrange(len(outputs)):
            final_transformed_val = tf.matmul(outputs[i],weights['out']) + biases['out']
            '''
            # TODO replace with zeroes where sentences are shorter and biases should not be calculated
            for length in tf_train_seq_length:
                tf.shape()
                if length <= i:
                    final_transformed_val[tf_train_seq_length.index(length)] = empty_pos
            '''
            logits.append(final_transformed_val)
        logits = tf.reshape(tf.concat(0, logits), [-1, n_classes])

        return logits, outputs_tensor
        def BiRNN (inputs, _seq_length):

            # input shape: (batch_size, seq_width, embedding_size) ==> (seq_width, batch_size, embedding_size)
            inputs = tf.transpose(inputs, [1, 0, 2])
            # Reshape before feeding to hidden activation layers
            inputs = tf.reshape(inputs, [-1, embedding_size])
            # Hidden activation
            #inputs = tf.nn.relu(tf.matmul(inputs, weights['hidden']) + biases['hidden'])
            # Split the inputs to make a list of inputs for the rnn
            inputs = tf.split(0, seq_width, inputs) # seq_width * (batch_size, n_hidden)

            initializer = tf.random_uniform_initializer(-1,1)

            with tf.variable_scope('forward'):
                #fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
                #lstm1 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #lstm2 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #fw_cell = rnn_cell.MultiRNNCell([lstm1, lstm2])
                fw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
            with tf.variable_scope('backward'):
                #bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
                #lstm3 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #lstm4 = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)
                #bw_cell = rnn_cell.MultiRNNCell([lstm3, lstm4])
                bw_cell = rnn_cell.LSTMCell(n_hidden, embedding_size, initializer=initializer)

            # Get lstm cell output
            outputs,_,_ = rnn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype="float32", sequence_length=_seq_length)
            outputs_tensor = tf.reshape(tf.concat(0, outputs),[-1, 2*n_hidden])

            logits = []

            for i in xrange(len(outputs)):
                final_transformed_val = tf.matmul(outputs[i],weights['out']) + biases['out']
                logits.append(final_transformed_val)
            logits = tf.reshape(tf.concat(0, logits), [-1, n_classes])

            return logits, outputs_tensor
Esempio n. 27
0
    def BiLSTM(_X, _C, _T, _istate_fw, _istate_bw, _weights, _biases):
        # input: a [len_sent,len_seq] (e.g. 7x5)
        # transform into embeddings
        with tf.device("/cpu:0"):
            emb_x = tf.nn.embedding_lookup(_weights['w_emb'],_X)
            emb_t = tf.nn.embedding_lookup(_weights['t_emb'],_T)
            emb_c = tf.nn.embedding_lookup(_weights['c_emb'],_C)

        # Linear activation
        _X = tf.matmul(emb_x, _weights['hidden_w']) + tf.matmul(emb_c, _weights['hidden_c']) + tf.matmul(emb_t,_weights['hidden_t']) + _biases['hidden_b']

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
        # Split data because rnn cell needs a list of inputs for the RNN inner loop
        _X = tf.split(0,max_sent_len,_X)

        # Get lstm cell output
        outputs = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, _X,initial_state_fw = _istate_fw, initial_state_bw=_istate_bw,sequence_length = seq_len)

        return outputs
Esempio n. 28
0
    def BiRNN(self, scope):
        # input shape: (batch_size, step_size, input_dim)
        # we need to permute step_size and batch_size(change the position of step and batch size)
        data = tf.transpose(self.input_data, [1, 0, 2])

        # Reshape to prepare input to hidden activation
        # (step_size*batch_size, n_input), flattens the batch and step
        #after the above transformation, data is now (step_size*batch_size, input_dim)
        data = tf.reshape(data, [-1, self.config.input_dim + 1])

        # Define lstm cells with tensorflow
        with tf.variable_scope(str(scope)):
            # Linear activation
            data = tf.matmul(data,
                             self.weights['hidden']) + self.biases['hidden']
            data = tf.nn.dropout(data, self.config.dropout)
            # Define a cell
            if self.config.cell_type == 'GRU':
                lstm_fw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
                lstm_bw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
            else:
                lstm_fw_cell = rnn_cell.LSTMCell(
                    self.config.hidden_dim,
                    forget_bias=self.config.forget_bias,
                    use_peepholes=self.config.use_peepholes,
                    cell_clip=self.config.cell_clip)
                lstm_bw_cell = rnn_cell.LSTMCell(
                    self.config.hidden_dim,
                    forget_bias=self.config.forget_bias,
                    use_peepholes=self.config.use_peepholes,
                    cell_clip=self.config.cell_clip)
            # Split data because rnn cell needs a list of inputs for the RNN inner loop
            data = tf.split(0, self.config.step_size,
                            data)  # step_size * (batch_size, hidden_dim)
            # Get lstm cell output
            print 'running single stack Bi-directional RNN.......'
            outputs = rnn.bidirectional_rnn(
                lstm_fw_cell,
                lstm_bw_cell,
                data,
                initial_state_fw=self.init_state_fw,
                initial_state_bw=self.init_state_bw,
                scope="RNN1")
            # for basic rnn prediction we really just interested in the last state's output, we need to average them in this case
            total_outputs = tf.div(tf.add_n([outputs[2], outputs[1]]), 2.0)
            return [
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out1']) +
                    self.biases['out1'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out2']) +
                    self.biases['out2'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out3']) +
                    self.biases['out3'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out4']) +
                    self.biases['out4'], self.config.dropout),
                tf.nn.dropout(
                    tf.matmul(total_outputs, self.weights['out5']) +
                    self.biases['out5'], self.config.dropout),
            ]
Esempio n. 29
0
    def __init__(self, num_chars, num_classes, num_steps=100, num_epochs=100, model_path='models/', \
        embedding_matrix=None, emb_dim=100,emb_trainable=False, is_training=True, is_crf=True, weight=False, l2_reg_lambda=0.2):
        # Parameter
        self.max_f1 = 0
        self.learning_rate = 0.002
        self.dropout_rate = 0.5
        self.batch_size = 128
        self.num_layers = 1   
        self.hidden_dim = 100
        self.num_epochs = num_epochs
        self.num_steps = num_steps
        self.num_chars = num_chars
        #self.num_classes = num_classes
        self.num_classes = 2
        self.model_path = model_path
 
        self.char2id, self.id2char = helper.loadMap(os.path.join(model_path, "char2id"))
        self.label2id, self.id2label = helper.loadMap(os.path.join(model_path, "label2id"))
        self.evaluate_labels = set() 
        for l in self.label2id.keys():
            if l[:2] in ['B-', 'I-', 'E-', 'S-']:
                self.evaluate_labels.add(l[2:]) 
            elif l not in ['OTHER','<PAD>']:
                self.evaluate_labels.add(l) 
        self.evaluate_labels = list(self.evaluate_labels)

        self.emb_dim = emb_dim
        
        # placeholder of x, y and weight
        #self.inputs = tf.placeholder(tf.int32, [None, self.num_steps, 2])
        self.inputs = tf.placeholder(tf.int32, [None, self.num_steps])
        self.pairs = tf.placeholder(tf.int32, [None, self.num_steps])
        self.targets = tf.placeholder(tf.float32, [None, self.num_classes])
        self.pair_indices = tf.placeholder(tf.int32, [None]) 
        self.pair_segment_ids = tf.placeholder(tf.int32, [None])  
        
        # char embedding
        if embedding_matrix != None:
            self.embedding = tf.Variable(embedding_matrix, trainable=emb_trainable, name="emb", dtype=tf.float32)
        else:
            self.embedding = tf.get_variable("emb", [self.num_chars, self.emb_dim])
    
        self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs)               # shape: [batch_size, num_steps, emb_dim]
        self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2])                  # shape: [num_steps, batch_size, emb_dim]
        self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim])           # shape: [(num_steps * batch_size), emb_dim]
        self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb)              # num_steps tensor,[batch_size, emb_dim]

        # lstm cell
        lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)
        lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)

        # dropout
        if is_training:
            lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate))
            lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate))

        lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers)
        lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers)

        # get the length of each sample, shape [batch_size]
        self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1)
        self.length = tf.cast(self.length, tf.int32)  
        
        # forward and backward
        # outputs: total num_steps tensors, each tensor's shape: [batch_size, hidden_dim * 2]
        self.outputs, _, _ = rnn.bidirectional_rnn(
            lstm_cell_fw, 
            lstm_cell_bw,
            self.inputs_emb, 
            dtype=tf.float32,
            sequence_length=self.length
        )

        # softmax
        self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, self.hidden_dim * 2]) #  shape: [batch_size*num_steps, hidden_dim*2]
        self.softmax_w = tf.get_variable("softmax_w", [self.hidden_dim * 2, self.num_classes])
        self.softmax_b = tf.get_variable("softmax_b", [self.num_classes])
        self.outputs_emb = tf.reshape(self.outputs, [-1, num_steps, self.hidden_dim*2])       # [batch_size, num_steps, hidden_dim*2]

        self.attention_w = tf.get_variable("attention_w", [self.hidden_dim*2, 1])
        self.attentions = tf.reshape(tf.matmul(self.outputs, self.attention_w),[-1, num_steps] ) # batch_size, num_steps]
        self.attentions = tf.nn.softmax(self.attentions) 
        self.attentions = tf.reshape(self.attentions, [self.batch_size, 1, num_steps]) #[batch_size, 1, num_steps]

        self.outputs_emb = tf.batch_matmul(self.attentions, self.outputs_emb)         #[batch_size, 1, hidden_dim*2]
        self.outputs_emb = tf.tanh(self.outputs_emb)
        self.outputs = tf.reshape(self.outputs_emb, [-1, self.hidden_dim*2] )             #[batch_size, hidden_dim*2]

        self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b        #[batch_size, num_classes] 
        self.predictions = tf.argmax(self.logits, 1, name="predictions")
        
        correct_predictions = tf.equal(self.predictions, tf.argmax(self.targets, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
        
        l2_loss = tf.constant(0.0)
        l2_loss += tf.nn.l2_loss(self.softmax_w)
        l2_loss += tf.nn.l2_loss(self.softmax_b)
        losses = tf.nn.softmax_cross_entropy_with_logits(self.logits, self.targets)
        self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
     
        # summary
        self.train_summary = tf.scalar_summary("loss", self.loss)
        self.val_summary = tf.scalar_summary("loss", self.loss)        
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 
Esempio n. 30
0
	def __init__(self, vocab_size, embedding_size, learning_rate, 
		learning_rate_decay_op, memory_hops, dropout_rate, 
		q_depth, a_depth, episodic_m_depth, ep_depth,
		m_input_size, attention_ff_l1_size, max_gradient_norm, maximum_story_length=100,
		maximum_question_length=20, use_lstm=False, forward_only=False):

		# initialization
		self.vocab_size = vocab_size
		self.embedding_size = embedding_size
		self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
		self.learning_rate_decay_op = tf.Variable(float(learning_rate_decay_op), trainable=False)
		self.dropout_rate = dropout_rate
		self.global_step = tf.Variable(0, trainable=False, name='global_step')
		self.q_depth = q_depth	# question RNN depth
		self.a_depth = a_depth	# answer RNN depth
		self.m_depth = episodic_m_depth # memory cell depth
		self.ep_depth = ep_depth	# episodic depth
		self.max_gradient_norm = max_gradient_norm
		self.memory_hops = memory_hops	# number of episodic memory pass
		self.m_input_size = m_input_size
		self.m_size = embedding_size # memory cell size
		self.a_size = embedding_size # answer RNN size


		self.attention_ff_l1_size = attention_ff_l1_size
		# attention_ff_l2_size 

		
		
		print("[*] Creating Dynamic Memory Network ...")
		# question module
		def seq2seq_fq(encoder_inputs, cell, mask=None):
			return seq2seq.sentence_embedding_rnn_q(
				encoder_inputs, self.vocab_size, cell, self.embedding_size, mask)
		def seq2seq_fs(encoder_inputs, cell, mask=None):
			return seq2seq.sentence_embedding_rnn_s(
				encoder_inputs, self.vocab_size, cell, self.embedding_size, mask)
		# attention gate in episodic
		# TODO: force gate logits to be sparse, add L1 norm regularization





		# Sentence token placeholder
		self.story = []
		for i in range(maximum_story_length):
			self.story.append(tf.placeholder(tf.int32, shape=[None], 
												name="story{0}".format(i)))
		self.story_mask = tf.placeholder(tf.int32, shape=[None], name="story_mask")
		self.story_len = tf.placeholder(tf.int32, shape=[], name="story length")
		print (self.story_len)
		self.question = []
		for i in range(maximum_question_length):
			self.question.append(tf.placeholder(tf.int32, shape=[None], name="question{0}".format(i)))
		self.answer = tf.placeholder(tf.int64, name="answer")

		# self.story_len = 1#= tf.reshape(tf.shape(self.story_mask), [])
		# TODO: fixed lens problem
		#self.story_len = 5

		# configuration of attention gate

		# print (self.story)
		


		with tf.variable_scope("answer"):
			softmax_weights = tf.Variable(tf.truncated_normal([self.a_size, self.vocab_size], -0.1, 0.1), name="softmax_weights")
			softmax_biases = tf.Variable(tf.zeros([self.vocab_size]), name="softmax_biases")
		
		answer_weights = tf.Variable(tf.truncated_normal([self.m_size, self.a_size], -0.1, 0.1), name="answer_weights")
		answer_biases = tf.Variable(tf.zeros([self.a_size]), name="answer_biases")

		#------------ question module ------------
		single_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
		if use_lstm:
			single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
		if not forward_only and dropout_rate < 1:
			single_cell = tf.nn.rnn_cell.DropoutWrapper(
				single_cell, output_keep_prob=dropout_rate)
		question_cell = single_cell
		if q_depth > 1:
			question_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell]*q_depth)
		question = seq2seq_fq(self.question, question_cell)
		self.question_state = question[0]
		#for e in question:


		#------------ Input module ------------
		reader_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
		if use_lstm:
			reader_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
		if not forward_only and dropout_rate < 1:
			reader_cell = tf.nn.rnn_cell.DropoutWrapper(
				reader_cell, output_keep_prob=dropout_rate)
		# Embedded toekn into vector, feed into rnn cell return cell state
		fusion_fw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
		fusion_bw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
		if use_lstm:
			fusion_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
			fusion_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)

		if not forward_only and dropout_rate < 1:
			fusion_fw_cell = tf.nn.rnn_cell.DropoutWrapper(
				fusion_fw_cell, output_keep_prob=dropout_rate)
			fusion_bw_cell = tf.nn.rnn_cell.DropoutWrapper(
				fusion_bw_cell, output_keep_prob=dropout_rate)

		(_facts, _, _) = rnn.bidirectional_rnn(fusion_fw_cell,fusion_bw_cell,
			seq2seq_fs(self.story, reader_cell),dtype=tf.float32)

		self.facts = _facts[0]




		#------------ episodic memory module ------------
		# TODO: use self.facts to extract ep_size
		self.ep_size = 2*self.embedding_size# episodic cell size
		# construct memory cell
		#single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.m_size)
		mem_cell = cell.MemCell(self.m_size)
		#mem_cell = tf.nn.rnn_cell.GRUCell(self.m_size)
		self.episodic_array = tf.Variable(tf.random_normal([1,1]))

		# construct episodic_cell

		# for i in xrange(self.memory_hops):
		single_cell = cell.MGRUCell(self.ep_size)
		ep_cell = cell.MultiMGRUCell([single_cell] * ep_depth)

		e = []
		mem_state = self.question_state
		q_double = tf.concat(1, [self.question_state, self.question_state])
		mem_state_double = tf.concat(1, [mem_state, mem_state])

		# TODO change z_dim to be 
		z_dim = self.embedding_size * 8
		self.attention_ff_size = z_dim
		self.attention_ff_l2_size = 1 
		# self._ep_initial_state = []
		# for _cell in range(ep_cell)
		# 	self._ep_initial_state.append = _cell.zero_state(1, tf.float32)	# TODO change batch size

		# initialize parameters	
		with tf.variable_scope("episodic"):
			# parameters of attention gate
			l1_weights = tf.Variable(tf.truncated_normal([self.attention_ff_size, self.attention_ff_l1_size], -0.1, 0.1), name="l1_weights")
			l1_biases = tf.Variable(tf.zeros([self.attention_ff_l1_size]), name="l1_biases")
			l2_weights = tf.Variable(tf.truncated_normal([self.attention_ff_l1_size, self.attention_ff_l2_size], -0.1, 0.1), name="l2_weights")
			l2_biases = tf.Variable(tf.zeros([self.attention_ff_l2_size]), name="l2_biases")
			# paramters of episodic
			mem_weights = tf.Variable(tf.truncated_normal([self.m_input_size, self.m_size], -0.1, 0.1), name="mem_weights")
			mem_biases = tf.Variable(tf.zeros([self.m_size]), name="mem_biases")


		# initializing variable of feedforward nn
		seq2seq.def_feedforward_nn(self.attention_ff_size, self.attention_ff_l1_size, self.attention_ff_l2_size)

		for hops in xrange(self.memory_hops):
			# gate attention network
			step = tf.constant(0)
			tf.while_loop(lambda step, story_len, facts, q_double, mem_state_double: tf.less(step, story_len),
				lambda step, story_len, facts, q_double, mem_state_double: self.mem_body(step, story_len, facts, q_double, mem_state_double),
				[step, self.story_len, self.facts, q_double, mem_state_double])	

			#self.episodic_gate = tf.reshape(tf.nn.softmax(self.episodic_array),[1])
			self.episodic_gate = tf.nn.softmax(tf.reshape(self.episodic_array, [1,-1]))
			print ("episodic_gate",self.episodic_gate)

			# attention GRU
			# output, context = cell.rnn(ep_cell[hops], [self.facts], self.episodic_gate, scope="epsodic", dtype=tf.float32)
			output, context = cell.rnn_ep(ep_cell, [self.facts], self.episodic_gate, dtype=tf.float32, scope="episodic")
			e.append(output)
			# memory updates
			#_, mem_state = mem_cell(context_state, mem_state)	# GRU
			#_, mem_state = cell.rnn_mem(mem_cell, [context], self.question_state, mem_state, self.m_input_size, self.m_size, dtype=tf.float32)
			mem_state = mem_cell(context,  self.question_state, mem_state, self.m_input_size, self.m_size)

			# if the attentioned module is last e, it means the episodic pass is over
			if np.argmax(np.asarray(e[-1])) == len(e[-1])-1:
				break
			
			
		#------------ answer ------------
		# TODO: use decoder sequence to generate answer
		answer_steps = 1
		single_cell = tf.nn.rnn_cell.GRUCell(self.a_size)
		answer_cell = single_cell
		if a_depth > 1:
			answer_cell =tf.nn.rnn_cell.MultiRNNCell([single_cell] * a_depth)
		
		a_state = mem_state
		for step in range(answer_steps):
			y = tf.nn.softmax(tf.matmul(a_state, answer_weights))
			(answer, a_state) = answer_cell(tf.concat(1, [self.question_state, y]), a_state)
			#(answer, a_state) = answer_cell(tf.concat(1, [question, mem_state]), a_state)

		self.logits = tf.nn.softmax(tf.matmul(answer, softmax_weights)+softmax_biases)
		answer = tf.reshape(tf.one_hot(self.answer, self.vocab_size, 1.0, 0.0), [1,self.vocab_size])
		self.loss = tf.reduce_mean(
			tf.nn.softmax_cross_entropy_with_logits(self.logits, answer))

		
		params = tf.trainable_variables()
		# testing
		for e in params:
			print(e.get_shape(), e.name, type(e))
		if not forward_only:
			self.gradient_norms = []
			self.updates = []
			optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
			gradients = tf.gradients(self.loss, params)
			clipped_gradients, norm = tf.clip_by_global_norm(gradients,
				self.max_gradient_norm)
			self.gradient_norms = norm
			self.updates = optimizer.apply_gradients(
				zip(clipped_gradients, params), global_step=self.global_step)
		
		self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 31
0
    def __init__(self,
                 num_chars,
                 num_classes,
                 num_steps=200,
                 num_epochs=100,
                 embedding_matrix=None,
                 is_training=True,
                 is_crf=True,
                 weight=False):
        # Parameter
        self.max_f1 = 0
        self.learning_rate = 0.002
        self.dropout_rate = 0.5
        self.batch_size = 128
        self.num_layers = 1
        self.emb_dim = 100
        self.hidden_dim = 100
        self.num_epochs = num_epochs
        self.num_steps = num_steps
        self.num_chars = num_chars
        self.num_classes = num_classes

        # placeholder of x, y and weight
        self.inputs = tf.placeholder(tf.int32, [None, self.num_steps])
        self.targets = tf.placeholder(tf.int32, [None, self.num_steps])
        self.targets_weight = tf.placeholder(tf.float32,
                                             [None, self.num_steps])
        self.targets_transition = tf.placeholder(tf.int32, [None])

        # char embedding
        if embedding_matrix != None:
            self.embedding = tf.Variable(embedding_matrix,
                                         trainable=False,
                                         name="emb",
                                         dtype=tf.float32)
        else:
            self.embedding = tf.get_variable("emb",
                                             [self.num_chars, self.emb_dim])
        self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs)
        self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2])
        self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim])
        self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb)

        # lstm cell
        lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)
        lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)

        # dropout
        if is_training:
            lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate))
            lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate))

        lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] *
                                                   self.num_layers)
        lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] *
                                                   self.num_layers)

        # get the length of each sample
        self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1)
        self.length = tf.cast(self.length, tf.int32)

        # forward and backward
        self.outputs, _, _ = rnn.bidirectional_rnn(lstm_cell_fw,
                                                   lstm_cell_bw,
                                                   self.inputs_emb,
                                                   dtype=tf.float32,
                                                   sequence_length=self.length)

        # softmax
        self.outputs = tf.reshape(tf.concat(1, self.outputs),
                                  [-1, self.hidden_dim * 2])
        self.softmax_w = tf.get_variable(
            "softmax_w", [self.hidden_dim * 2, self.num_classes])
        self.softmax_b = tf.get_variable("softmax_b", [self.num_classes])
        self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b

        if not is_crf:
            pass
        else:
            self.tags_scores = tf.reshape(
                self.logits,
                [self.batch_size, self.num_steps, self.num_classes])
            self.transitions = tf.get_variable(
                "transitions", [self.num_classes + 1, self.num_classes + 1])

            dummy_val = -1000
            class_pad = tf.Variable(dummy_val * np.ones(
                (self.batch_size, self.num_steps, 1)),
                                    dtype=tf.float32)
            self.observations = tf.concat(2, [self.tags_scores, class_pad])

            begin_vec = tf.Variable(np.array(
                [[dummy_val] * self.num_classes + [0]
                 for _ in range(self.batch_size)]),
                                    trainable=False,
                                    dtype=tf.float32)
            end_vec = tf.Variable(np.array([[0] +
                                            [dummy_val] * self.num_classes
                                            for _ in range(self.batch_size)]),
                                  trainable=False,
                                  dtype=tf.float32)
            begin_vec = tf.reshape(begin_vec,
                                   [self.batch_size, 1, self.num_classes + 1])
            end_vec = tf.reshape(end_vec,
                                 [self.batch_size, 1, self.num_classes + 1])

            self.observations = tf.concat(
                1, [begin_vec, self.observations, end_vec])

            self.mask = tf.cast(
                tf.reshape(tf.sign(self.targets),
                           [self.batch_size * self.num_steps]), tf.float32)

            # point score
            self.point_score = tf.gather(
                tf.reshape(self.tags_scores, [-1]),
                tf.range(0, self.batch_size * self.num_steps) *
                self.num_classes +
                tf.reshape(self.targets, [self.batch_size * self.num_steps]))
            self.point_score *= self.mask

            # transition score
            self.trans_score = tf.gather(tf.reshape(self.transitions, [-1]),
                                         self.targets_transition)

            # real score
            self.target_path_score = tf.reduce_sum(
                self.point_score) + tf.reduce_sum(self.trans_score)

            # all path score
            self.total_path_score, self.max_scores, self.max_scores_pre = self.forward(
                self.observations, self.transitions, self.length)

            # loss
            self.loss = -(self.target_path_score - self.total_path_score)

        # summary
        self.train_summary = tf.scalar_summary("loss", self.loss)
        self.val_summary = tf.scalar_summary("loss", self.loss)

        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss)
Esempio n. 32
0
def shared_layer(input_data, config, is_training):
    """Build the model to decoding

    Args:
        input_data = size batch_size X num_steps X embedding size

    Returns:
        output units
    """

    if config.bidirectional == True:
        if config.lstm == True:
            cell_fw = rnn_cell.BasicLSTMCell(config.encoder_size,
                                             forget_bias=1.0)
            cell_bw = rnn_cell.BasicLSTMCell(config.encoder_size,
                                             forget_bias=1.0)
        else:
            cell_fw = rnn_cell.GRUCell(config.encoder_size)
            cell_bw = rnn_cell.GRUCell(config.encoder_size)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, config.num_steps, input_data)
        ]

        if is_training and config.keep_prob < 1:
            cell_fw = rnn_cell.DropoutWrapper(
                cell_fw, output_keep_prob=config.keep_prob)
            cell_bw = rnn_cell.DropoutWrapper(
                cell_bw, output_keep_prob=config.keep_prob)

        cell_fw = rnn_cell.MultiRNNCell([cell_fw] * config.num_shared_layers)
        cell_bw = rnn_cell.MultiRNNCell([cell_bw] * config.num_shared_layers)

        initial_state_fw = cell_fw.zero_state(config.batch_size, tf.float32)
        initial_state_bw = cell_bw.zero_state(config.batch_size, tf.float32)

        encoder_outputs, _, _ = rnn.bidirectional_rnn(
            cell_fw,
            cell_bw,
            inputs,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            scope="encoder_rnn")

    else:
        if config.lstm == True:
            cell = rnn_cell.BasicLSTMCell(config.encoder_size)
        else:
            cell = rnn_cell.GRUCell(config.encoder_size)

        inputs = [
            tf.squeeze(input_, [1])
            for input_ in tf.split(1, config.num_steps, input_data)
        ]

        if is_training and config.keep_prob < 1:
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=config.keep_prob)

        cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

        initial_state = cell.zero_state(config.batch_size, tf.float32)

        encoder_outputs, encoder_states = rnn.rnn(cell,
                                                  inputs,
                                                  initial_state=initial_state,
                                                  scope="encoder_rnn")

    return encoder_outputs
Esempio n. 33
0
    def __init__(self, label_size, vocab_size, data_x_seq, data_x_ep, data_y, ep_pattern_map, FLAGS):
        self.ep_pattern_map = ep_pattern_map
        self.label_size = label_size
        self.vocab_size = vocab_size
        self.FLAGS = FLAGS

        # shuffle data
        zipped_data = zip(data_x_seq, data_x_ep, data_y)
        shuffle(zipped_data)
        data_x_seq, data_x_ep, data_y = zip(*zipped_data)

        # convert data to numpy arrays - labels must be dense one-hot vectors
        dense_y = []
        for epoch, j in enumerate(data_y):
            dense_y.append([0] * label_size)
            dense_y[epoch][j] = 1
        data_x_seq, data_x_ep, data_y = np.array(data_x_seq), np.array(data_x_ep), np.array(dense_y)
        self.train_x, self.dev_x = data_x_seq[:-FLAGS.dev_samples], data_x_seq[-FLAGS.dev_samples:]
        self.train_x_ep, self.dev_x_ep = data_x_ep[:-FLAGS.dev_samples], data_x_ep[-FLAGS.dev_samples:]
        self.train_y, self.dev_y = data_y[:-FLAGS.dev_samples], data_y[-FLAGS.dev_samples:]

        # set up graph
        with tf.device('/gpu:'+str(FLAGS.gpuid)):
            self.is_training = tf.placeholder(tf.bool)
            self.batch_size = tf.placeholder(tf.float32)
            self.input_x = tf.placeholder(tf.int32, [None, FLAGS.seq_len], name="input_x")
            self.input_y = tf.placeholder(tf.float32, [None, label_size], name="input_y")
            self.state = tf.placeholder(tf.float32)


            with tf.device('/cpu:0'):
                lookup_table = tf.Variable(tf.random_uniform([vocab_size, FLAGS.word_dim], -1.0, 1.0))
                inputs = tf.nn.embedding_lookup(lookup_table, self.input_x)
            inputs = tf.nn.dropout(inputs, 1 - FLAGS.dropout)
            inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, FLAGS.seq_len, inputs)]

            lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=FLAGS.hidden_dim, input_size=FLAGS.word_dim)
            if self.is_training and 1 - FLAGS.dropout < 1:
                lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout)
            cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.num_layers)
            if FLAGS.bi:
                back_cell = tf.nn.rnn_cell.LSTMCell(num_units=FLAGS.hidden_dim, input_size=FLAGS.word_dim)
                if self.is_training and 1 - FLAGS.dropout < 1:
                    back_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - FLAGS.dropout)
                back_cell = tf.nn.rnn_cell.MultiRNNCell([back_cell] * FLAGS.num_layers)
                outputs = rnn.bidirectional_rnn(cell, back_cell, inputs, dtype=tf.float32)
                state = outputs[-1] + outputs[len(outputs)/2]
            else:
                outputs, state = rnn.rnn(cell, inputs, dtype=tf.float32)

            # lstm returns [hiddenstate+cell] -- extact just the hidden state
            self._state = tf.slice(state, [0, 0], tf.cast(tf.pack([self.batch_size, FLAGS.hidden_dim]), tf.int32))
            softmax_w = tf.get_variable("softmax_w", [FLAGS.hidden_dim, label_size])
            softmax_b = tf.get_variable("softmax_b", [label_size])

            self._logits = tf.nn.xw_plus_b(self.state, softmax_w, softmax_b, name="logits")
            # training loss
            loss = tf.nn.softmax_cross_entropy_with_logits(self._logits, self.input_y)
            self._cost = tf.reduce_sum(loss) / self.batch_size

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars, aggregation_method=2), FLAGS.max_grad_norm)
            optimizer = tf.train.AdamOptimizer(FLAGS.lr)
            self._train_op = optimizer.apply_gradients(zip(grads, tvars))

            # eval
            correct_prediction = tf.equal(tf.argmax(self._logits, 1), tf.argmax(self.input_y, 1))
            self._accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Esempio n. 34
0
    def __init__(self,
                 vocab_size,
                 embedding_size,
                 learning_rate,
                 learning_rate_decay_op,
                 memory_hops,
                 dropout_rate,
                 q_depth,
                 a_depth,
                 episodic_m_depth,
                 ep_depth,
                 m_input_size,
                 attention_ff_l1_size,
                 max_gradient_norm,
                 maximum_story_length=100,
                 maximum_question_length=20,
                 use_lstm=False,
                 forward_only=False):

        # initialization
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = tf.Variable(
            float(learning_rate_decay_op), trainable=False)
        self.dropout_rate = dropout_rate
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.q_depth = q_depth  # question RNN depth
        self.a_depth = a_depth  # answer RNN depth
        self.m_depth = episodic_m_depth  # memory cell depth
        self.ep_depth = ep_depth  # episodic depth
        self.max_gradient_norm = max_gradient_norm
        self.memory_hops = memory_hops  # number of episodic memory pass
        self.m_input_size = m_input_size
        self.m_size = embedding_size  # memory cell size
        self.a_size = embedding_size  # answer RNN size

        self.attention_ff_l1_size = attention_ff_l1_size
        # attention_ff_l2_size

        print("[*] Creating Dynamic Memory Network ...")

        # question module
        def seq2seq_fq(encoder_inputs, cell, mask=None):
            return seq2seq.sentence_embedding_rnn_q(encoder_inputs,
                                                    self.vocab_size, cell,
                                                    self.embedding_size, mask)

        def seq2seq_fs(encoder_inputs, cell, mask=None):
            return seq2seq.sentence_embedding_rnn_s(encoder_inputs,
                                                    self.vocab_size, cell,
                                                    self.embedding_size, mask)

        # attention gate in episodic
        # TODO: force gate logits to be sparse, add L1 norm regularization

        # Sentence token placeholder
        self.story = []
        for i in range(maximum_story_length):
            self.story.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="story{0}".format(i)))
        self.story_mask = tf.placeholder(tf.int32,
                                         shape=[None],
                                         name="story_mask")
        self.story_len = tf.placeholder(tf.int32,
                                        shape=[],
                                        name="story length")
        print(self.story_len)
        self.question = []
        for i in range(maximum_question_length):
            self.question.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="question{0}".format(i)))
        self.answer = tf.placeholder(tf.int64, name="answer")

        # self.story_len = 1#= tf.reshape(tf.shape(self.story_mask), [])
        # TODO: fixed lens problem
        #self.story_len = 5

        # configuration of attention gate

        # print (self.story)

        with tf.variable_scope("answer"):
            softmax_weights = tf.Variable(tf.truncated_normal(
                [self.a_size, self.vocab_size], -0.1, 0.1),
                                          name="softmax_weights")
            softmax_biases = tf.Variable(tf.zeros([self.vocab_size]),
                                         name="softmax_biases")

        answer_weights = tf.Variable(tf.truncated_normal(
            [self.m_size, self.a_size], -0.1, 0.1),
                                     name="answer_weights")
        answer_biases = tf.Variable(tf.zeros([self.a_size]),
                                    name="answer_biases")

        #------------ question module ------------
        single_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
        if use_lstm:
            single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
        if not forward_only and dropout_rate < 1:
            single_cell = tf.nn.rnn_cell.DropoutWrapper(
                single_cell, output_keep_prob=dropout_rate)
        question_cell = single_cell
        if q_depth > 1:
            question_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] *
                                                        q_depth)
        question = seq2seq_fq(self.question, question_cell)
        self.question_state = question[0]
        #for e in question:

        #------------ Input module ------------
        reader_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
        if use_lstm:
            reader_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
        if not forward_only and dropout_rate < 1:
            reader_cell = tf.nn.rnn_cell.DropoutWrapper(
                reader_cell, output_keep_prob=dropout_rate)
        # Embedded toekn into vector, feed into rnn cell return cell state
        fusion_fw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
        fusion_bw_cell = tf.nn.rnn_cell.GRUCell(self.embedding_size)
        if use_lstm:
            fusion_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)
            fusion_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(self.embedding_size)

        if not forward_only and dropout_rate < 1:
            fusion_fw_cell = tf.nn.rnn_cell.DropoutWrapper(
                fusion_fw_cell, output_keep_prob=dropout_rate)
            fusion_bw_cell = tf.nn.rnn_cell.DropoutWrapper(
                fusion_bw_cell, output_keep_prob=dropout_rate)

        (_facts, _,
         _) = rnn.bidirectional_rnn(fusion_fw_cell,
                                    fusion_bw_cell,
                                    seq2seq_fs(self.story, reader_cell),
                                    dtype=tf.float32)

        self.facts = _facts[0]

        #------------ episodic memory module ------------
        # TODO: use self.facts to extract ep_size
        self.ep_size = 2 * self.embedding_size  # episodic cell size
        # construct memory cell
        #single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.m_size)
        mem_cell = cell.MemCell(self.m_size)
        #mem_cell = tf.nn.rnn_cell.GRUCell(self.m_size)
        self.episodic_array = tf.Variable(tf.random_normal([1, 1]))

        # construct episodic_cell

        # for i in xrange(self.memory_hops):
        single_cell = cell.MGRUCell(self.ep_size)
        ep_cell = cell.MultiMGRUCell([single_cell] * ep_depth)

        e = []
        mem_state = self.question_state
        q_double = tf.concat(1, [self.question_state, self.question_state])
        mem_state_double = tf.concat(1, [mem_state, mem_state])

        # TODO change z_dim to be
        z_dim = self.embedding_size * 8
        self.attention_ff_size = z_dim
        self.attention_ff_l2_size = 1
        # self._ep_initial_state = []
        # for _cell in range(ep_cell)
        # 	self._ep_initial_state.append = _cell.zero_state(1, tf.float32)	# TODO change batch size

        # initialize parameters
        with tf.variable_scope("episodic"):
            # parameters of attention gate
            l1_weights = tf.Variable(tf.truncated_normal(
                [self.attention_ff_size, self.attention_ff_l1_size], -0.1,
                0.1),
                                     name="l1_weights")
            l1_biases = tf.Variable(tf.zeros([self.attention_ff_l1_size]),
                                    name="l1_biases")
            l2_weights = tf.Variable(tf.truncated_normal(
                [self.attention_ff_l1_size, self.attention_ff_l2_size], -0.1,
                0.1),
                                     name="l2_weights")
            l2_biases = tf.Variable(tf.zeros([self.attention_ff_l2_size]),
                                    name="l2_biases")
            # paramters of episodic
            mem_weights = tf.Variable(tf.truncated_normal(
                [self.m_input_size, self.m_size], -0.1, 0.1),
                                      name="mem_weights")
            mem_biases = tf.Variable(tf.zeros([self.m_size]),
                                     name="mem_biases")

        # initializing variable of feedforward nn
        seq2seq.def_feedforward_nn(self.attention_ff_size,
                                   self.attention_ff_l1_size,
                                   self.attention_ff_l2_size)

        for hops in xrange(self.memory_hops):
            # gate attention network
            step = tf.constant(0)
            tf.while_loop(
                lambda step, story_len, facts, q_double, mem_state_double: tf.
                less(step, story_len),
                lambda step, story_len, facts, q_double, mem_state_double: self
                .mem_body(step, story_len, facts, q_double, mem_state_double),
                [step, self.story_len, self.facts, q_double, mem_state_double])

            #self.episodic_gate = tf.reshape(tf.nn.softmax(self.episodic_array),[1])
            self.episodic_gate = tf.nn.softmax(
                tf.reshape(self.episodic_array, [1, -1]))
            print("episodic_gate", self.episodic_gate)

            # attention GRU
            # output, context = cell.rnn(ep_cell[hops], [self.facts], self.episodic_gate, scope="epsodic", dtype=tf.float32)
            output, context = cell.rnn_ep(ep_cell, [self.facts],
                                          self.episodic_gate,
                                          dtype=tf.float32,
                                          scope="episodic")
            e.append(output)
            # memory updates
            #_, mem_state = mem_cell(context_state, mem_state)	# GRU
            #_, mem_state = cell.rnn_mem(mem_cell, [context], self.question_state, mem_state, self.m_input_size, self.m_size, dtype=tf.float32)
            mem_state = mem_cell(context, self.question_state, mem_state,
                                 self.m_input_size, self.m_size)

            # if the attentioned module is last e, it means the episodic pass is over
            if np.argmax(np.asarray(e[-1])) == len(e[-1]) - 1:
                break

        #------------ answer ------------
        # TODO: use decoder sequence to generate answer
        answer_steps = 1
        single_cell = tf.nn.rnn_cell.GRUCell(self.a_size)
        answer_cell = single_cell
        if a_depth > 1:
            answer_cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * a_depth)

        a_state = mem_state
        for step in range(answer_steps):
            y = tf.nn.softmax(tf.matmul(a_state, answer_weights))
            (answer,
             a_state) = answer_cell(tf.concat(1, [self.question_state, y]),
                                    a_state)
            #(answer, a_state) = answer_cell(tf.concat(1, [question, mem_state]), a_state)

        self.logits = tf.nn.softmax(
            tf.matmul(answer, softmax_weights) + softmax_biases)
        answer = tf.reshape(tf.one_hot(self.answer, self.vocab_size, 1.0, 0.0),
                            [1, self.vocab_size])
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(self.logits, answer))

        params = tf.trainable_variables()
        # testing
        for e in params:
            print(e.get_shape(), e.name, type(e))
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(
                gradients, self.max_gradient_norm)
            self.gradient_norms = norm
            self.updates = optimizer.apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 35
0
    cell_fw = rnn_cell.DropoutWrapper(cell_fw, output_keep_prob=keep_prob)
    initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)

with tf.name_scope("Cell_bw") as scope:
    #Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut
    cell_bw = rnn_cell.BasicLSTMCell(hidden_size)
    cell_bw = rnn_cell.MultiRNNCell([cell_bw] * num_layers)
    cell_bw = rnn_cell.DropoutWrapper(cell_bw, output_keep_prob=keep_prob)
    initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)

with tf.name_scope("RNN") as scope:
    # Thanks to Tensorflow, the entire decoder is just one line of code:
    #outputs, states = seq2seq.rnn_decoder(inputs, initial_state, cell_fw)
    outputs, _, _ = rnn.bidirectional_rnn(cell_fw,
                                          cell_bw,
                                          inputs,
                                          initial_state_fw=initial_state_fw,
                                          initial_state_bw=initial_state_bw,
                                          dtype=tf.float32)
    outputs_tensor = tf.concat(0, outputs)
    final = outputs[-1]

with tf.name_scope("Mark") as scope:
    W_m = tf.Variable(tf.random_normal([2 * hidden_size, 1], stddev=0.01))
    b_m = tf.Variable(tf.random_normal([1], stddev=0.01))
    h_m = tf.matmul(outputs_tensor, W_m) + b_m
    h_mark = tf.reshape(h_m, (seq_len, batch_size))
    h_markt = tf.transpose(h_mark)
    sm_mark = tf.nn.softmax(h_markt)
    cost_mark = tf.nn.sparse_softmax_cross_entropy_with_logits(h_markt, marks)
    loss_mark = tf.reduce_mean(cost_mark)
Esempio n. 36
0
def lm_private(encoder_units, pos_prediction, chunk_prediction, config,
               is_training):
    """Decode model for lm

    Args:
        encoder_units - these are the encoder units:
        [batch_size X encoder_size] with the one the pos prediction
        pos_prediction:
        must be the same size as the encoder_size

    returns:
        logits
    """
    # concatenate the encoder_units and the pos_prediction

    pos_prediction = tf.reshape(
        pos_prediction,
        [config.batch_size, config.num_steps, config.pos_embedding_size])
    chunk_prediction = tf.reshape(
        chunk_prediction,
        [config.batch_size, config.num_steps, config.chunk_embedding_size])
    lm_inputs = tf.concat(2, [chunk_prediction, pos_prediction, encoder_units])

    with tf.variable_scope("lm_decoder"):
        if config.bidirectional == True:
            if config.lstm == True:
                cell_fw = rnn_cell.BasicLSTMCell(config.lm_decoder_size,
                                                 forget_bias=1.0)
                cell_bw = rnn_cell.BasicLSTMCell(config.lm_decoder_size,
                                                 forget_bias=1.0)
            else:
                cell_fw = rnn_cell.GRUCell(config.lm_decoder_size)
                cell_bw = rnn_cell.GRUCell(config.lm_decoder_size)

            if is_training and config.keep_prob < 1:
                cell_fw = rnn_cell.DropoutWrapper(
                    cell_fw, output_keep_prob=config.keep_prob)
                cell_bw = rnn_cell.DropoutWrapper(
                    cell_bw, output_keep_prob=config.keep_prob)

            cell_fw = rnn_cell.MultiRNNCell([cell_fw] *
                                            config.num_shared_layers)
            cell_bw = rnn_cell.MultiRNNCell([cell_bw] *
                                            config.num_shared_layers)

            initial_state_fw = cell_fw.zero_state(config.batch_size,
                                                  tf.float32)
            initial_state_bw = cell_bw.zero_state(config.batch_size,
                                                  tf.float32)

            # this function puts the 3d tensor into a 2d tensor: batch_size x input size
            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, config.num_steps, lm_inputs)
            ]

            decoder_outputs, _, _ = rnn.bidirectional_rnn(
                cell_fw,
                cell_bw,
                inputs,
                initial_state_fw=initial_state_fw,
                initial_state_bw=initial_state_bw,
                scope="lm_rnn")
            output = tf.reshape(tf.concat(1, decoder_outputs),
                                [-1, 2 * config.lm_decoder_size])
            softmax_w = tf.get_variable(
                "softmax_w", [2 * config.lm_decoder_size, config.vocab_size])
        else:
            if config.lstm == True:
                cell = rnn_cell.BasicLSTMCell(config.lm_decoder_size)
            else:
                cell = rnn_cell.GRUCell(config.lm_decoder_size)

            if is_training and config.keep_prob < 1:
                cell = rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=config.keep_prob)

            cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

            initial_state = cell.zero_state(config.batch_size, tf.float32)

            # this function puts the 3d tensor into a 2d tensor: batch_size x input size
            inputs = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, config.num_steps, lm_inputs)
            ]

            decoder_outputs, decoder_states = rnn.rnn(
                cell, inputs, initial_state=initial_state, scope="lm_rnn")

            output = tf.reshape(tf.concat(1, decoder_outputs),
                                [-1, config.lm_decoder_size])
            softmax_w = tf.get_variable(
                "softmax_w", [config.lm_decoder_size, config.vocab_size])

        softmax_b = tf.get_variable("softmax_b", [config.vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        l2_penalty = tf.reduce_sum(tf.square(output))

    return logits, l2_penalty
Esempio n. 37
0
def pos_private(encoder_units, config, is_training):
    """Decode model for pos

    Args:
        encoder_units - these are the encoder units
        num_pos - the number of pos tags there are (output units)

    returns:
        logits
    """
    with tf.variable_scope("pos_decoder"):
        if config.bidirectional == True:
            if config.lstm == True:
                cell_fw = rnn_cell.BasicLSTMCell(config.pos_decoder_size,
                                              forget_bias=1.0)
                cell_bw = rnn_cell.BasicLSTMCell(config.pos_decoder_size,
                                              forget_bias=1.0)
            else:
                cell_fw = rnn_cell.GRUCell(config.pos_decoder_size)
                cell_bw = rnn_cell.GRUCell(config.pos_decoder_size)

            if is_training and config.keep_prob < 1:
                cell_fw = rnn_cell.DropoutWrapper(
                    cell_fw, output_keep_prob=config.keep_prob)
                cell_bw = rnn_cell.DropoutWrapper(
                    cell_bw, output_keep_prob=config.keep_prob)

            cell_fw = rnn_cell.MultiRNNCell([cell_fw] * config.num_shared_layers)
            cell_bw = rnn_cell.MultiRNNCell([cell_bw] * config.num_shared_layers)

            initial_state_fw = cell_fw.zero_state(config.batch_size, tf.float32)
            initial_state_bw = cell_bw.zero_state(config.batch_size, tf.float32)

            # puts it into batch_size X input_size
            inputs = [tf.squeeze(input_, [1])
                      for input_ in tf.split(1, config.num_steps,
                                             encoder_units)]

            decoder_outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs,
                                                      initial_state_fw=initial_state_fw,
                                                      initial_state_bw=initial_state_bw,
                                                      scope="pos_rnn")

            output = tf.reshape(tf.concat(1, decoder_outputs),
                                [-1, 2*config.pos_decoder_size])

            softmax_w = tf.get_variable("softmax_w",
                                        [2*config.pos_decoder_size,
                                         config.num_pos_tags])
        else:
            if config.lstm == True:
                cell = rnn_cell.BasicLSTMCell(config.pos_decoder_size,
                                          forget_bias=1.0)
            else:
                cell = rnn_cell.GRUCell(config.pos_decoder_size)

            if is_training and config.keep_prob < 1:
                cell = rnn_cell.DropoutWrapper(
                    cell, output_keep_prob=config.keep_prob)

            cell = rnn_cell.MultiRNNCell([cell] * config.num_shared_layers)

            initial_state = cell.zero_state(config.batch_size, tf.float32)

            # puts it into batch_size X input_size
            inputs = [tf.squeeze(input_, [1])
                      for input_ in tf.split(1, config.num_steps,
                                             encoder_units)]

            decoder_outputs, decoder_states = rnn.rnn(cell, inputs,
                                                      initial_state=initial_state,
                                                      scope="pos_rnn")
            output = tf.reshape(tf.concat(1, decoder_outputs),
                                [-1, config.pos_decoder_size])

            softmax_w = tf.get_variable("softmax_w",
                                        [config.pos_decoder_size,
                                         config.num_pos_tags])

        softmax_b = tf.get_variable("softmax_b", [config.num_pos_tags])
        logits = tf.matmul(output, softmax_w) + softmax_b
        l2_penalty = tf.reduce_sum(tf.square(output))

    return logits, l2_penalty
  def __init__(self, is_training, vocab_size, tag_size, maxlen):
    self._batch_size = FLAGS.batch_size
    self._hidden_size = FLAGS.hidden_size
    self._num_layers = FLAGS.num_layers
    self._dropout_keep_prob = FLAGS.dropout_keep_prob
    self._vocab_size = vocab_size
    self._tag_size = tag_size
    self._is_training = is_training

    self._input_data = tf.placeholder(tf.int32, [self._batch_size, maxlen])
    self._targets = tf.placeholder(tf.int32, [self._batch_size, maxlen])
    self._mask = tf.placeholder(tf.bool, [self._batch_size, maxlen])

    lstm_cell = tf.nn.rnn_cell.LSTMCell(self._hidden_size, self._hidden_size)
    if is_training and self._dropout_keep_prob < 1:
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
            lstm_cell, output_keep_prob=self._dropout_keep_prob)

    cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self._num_layers)
    cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self._num_layers)

    self._initial_state_fw = cell_fw.zero_state(self._batch_size, tf.float32)
    self._initial_state_bw = cell_bw.zero_state(self._batch_size, tf.float32)

    with tf.device("/cpu:0"):
      self._embedding = tf.get_variable("embedding", [self._vocab_size,
                                                      self._hidden_size])
      inputs = tf.nn.embedding_lookup(self._embedding, self._input_data)

    inputs = [input_ for input_ in tf.unpack(tf.transpose(inputs, [1, 0, 2]))]
    if is_training and self._dropout_keep_prob < 1:
        inputs = tf.nn.dropout(tf.pack(inputs), self._dropout_keep_prob)
        inputs = tf.unpack(inputs)
    outputs = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs,
                                    initial_state_fw=self._initial_state_fw,
                                    initial_state_bw=self._initial_state_bw)
    # output from forward and backward cells.
    output = tf.reshape(tf.concat(1, outputs), [-1, 2 * self._hidden_size])
    softmax_w = tf.get_variable("softmax_w", [2 * self._hidden_size, self._tag_size])
    softmax_b = tf.get_variable("softmax_b", [self._tag_size])
    logits = tf.matmul(output, softmax_w) + softmax_b
    loss = tf.nn.seq2seq.sequence_loss_by_example(
        [logits],
        [tf.reshape(self._targets, [-1])],
        [tf.reshape(tf.cast(self._mask, tf.float32), [-1])], self._tag_size)
    self._cost = cost = tf.reduce_sum(loss) / self._batch_size

    equality = tf.equal(tf.argmax(logits, 1),
                        tf.cast(tf.reshape(self._targets, [-1]), tf.int64))
    masked = tf.boolean_mask(equality, tf.reshape(self.mask, [-1]))
    self._misclass = 1 - tf.reduce_mean(tf.cast(masked, tf.float32))

    if not is_training:
      return

    self._lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      FLAGS.max_grad_norm)
    optimizer = tf.train.GradientDescentOptimizer(self.lr)
    self._train_op = optimizer.apply_gradients(zip(grads, tvars))
  cell_fw = rnn_cell.MultiRNNCell([cell_fw] * num_layers)
  cell_fw = rnn_cell.DropoutWrapper(cell_fw,output_keep_prob=keep_prob)			
  initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)

with tf.name_scope("Cell_bw") as scope:
  #Define one cell, stack the cell to obtain many layers of cell and wrap a DropOut
  cell_bw = rnn_cell.BasicLSTMCell(hidden_size)
  cell_bw = rnn_cell.MultiRNNCell([cell_bw] * num_layers)
  cell_bw = rnn_cell.DropoutWrapper(cell_bw,output_keep_prob=keep_prob)			
  initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)

with tf.name_scope("RNN") as scope:
  # Thanks to Tensorflow, the entire decoder is just one line of code:
  #outputs, states = seq2seq.rnn_decoder(inputs, initial_state, cell_fw)
  outputs, _, _ = rnn.bidirectional_rnn(cell_fw, cell_bw, inputs,
                      initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw,
                      dtype=tf.float32)
  outputs_tensor = tf.concat(0, outputs)
  final = outputs[-1]
  
with tf.name_scope("Mark") as scope:
  W_m = tf.Variable(tf.random_normal([2*hidden_size,1], stddev=0.01))     
  b_m = tf.Variable(tf.random_normal([1], stddev=0.01))
  h_m = tf.matmul(outputs_tensor, W_m) + b_m
  h_mark = tf.reshape(h_m,(seq_len,batch_size))
  h_markt = tf.transpose(h_mark)
  sm_mark = tf.nn.softmax(h_markt)
  cost_mark = tf.nn.sparse_softmax_cross_entropy_with_logits(h_markt,marks)
  loss_mark = tf.reduce_mean(cost_mark)

with tf.name_scope("Output") as scope:
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn': cell_fn = jzRNNCell
        elif args.model == 'gru': cell_fn = jzGRUCell
        elif args.model == 'lstm': cell_fn = jzLSTMCell
        else: raise Exception("model type not supported: {}".format(args.model))

        if args.activation == 'tanh': cell_af = tf.tanh
        elif args.activation == 'sigmoid': cell_af = tf.sigmoid
        elif args.activation == 'relu': cell_af = tf.nn.relu
        else: raise Exception("activation function not supported: {}".format(args.activation))

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        with tf.variable_scope('rnnlm'):
            if not args.bidirectional:
                softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            else:
                softmax_w = tf.get_variable("softmax_w", [args.rnn_size*2, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.nn.dropout(tf.squeeze(input_, [1]),args.dropout) for input_ in inputs]

        # one-directional RNN (nothing changed here..)
        if not args.bidirectional:
            cell = cell_fn(args.rnn_size,activation=cell_af)
            self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
            self.initial_state = cell.zero_state(args.batch_size, tf.float32)
            def loop(prev, _):
                prev = tf.matmul(prev, softmax_w) + softmax_b
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embedding, prev_symbol)
            outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
            output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])

        # bi-directional RNN
        else:
            lstm_fw = cell_fn(args.rnn_size,activation=cell_af)
            lstm_bw = cell_fn(args.rnn_size,activation=cell_af)
            self.lstm_fw = lstm_fw = rnn_cell.MultiRNNCell([lstm_fw]*args.num_layers)
            self.lstm_bw = lstm_bw = rnn_cell.MultiRNNCell([lstm_bw]*args.num_layers)
            self.initial_state_fw = lstm_fw.zero_state(args.batch_size,tf.float32)
            self.initial_state_bw = lstm_bw.zero_state(args.batch_size,tf.float32)
            outputs,_,_ = rnn.bidirectional_rnn(lstm_fw, lstm_bw, inputs,
                                            initial_state_fw=self.initial_state_fw,
                                            initial_state_bw=self.initial_state_bw,
                                                sequence_length=args.batch_size) 
            output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size*2])

        self.logits = tf.matmul(tf.nn.dropout(output,args.dropout), softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    def __init__(self, num_chars, num_classes, num_steps=200, num_epochs=100, embedding_matrix=None, is_training=True, is_crf=True, weight=False):
        # Parameter
        self.max_f1 = 0
        self.learning_rate = 0.002
        self.dropout_rate = 0.5
        self.batch_size = 128
        self.num_layers = 1   
        self.emb_dim = 100
        self.hidden_dim = 100
        self.num_epochs = num_epochs
        self.num_steps = num_steps
        self.num_chars = num_chars
        self.num_classes = num_classes
        
        # placeholder of x, y and weight
        self.inputs = tf.placeholder(tf.int32, [None, self.num_steps])
        self.targets = tf.placeholder(tf.int32, [None, self.num_steps])
        self.targets_weight = tf.placeholder(tf.float32, [None, self.num_steps])
        self.targets_transition = tf.placeholder(tf.int32, [None])
        
        # char embedding
        if embedding_matrix != None:
            self.embedding = tf.Variable(embedding_matrix, trainable=False, name="emb", dtype=tf.float32)
        else:
            self.embedding = tf.get_variable("emb", [self.num_chars, self.emb_dim])
        self.inputs_emb = tf.nn.embedding_lookup(self.embedding, self.inputs)
        self.inputs_emb = tf.transpose(self.inputs_emb, [1, 0, 2])
        self.inputs_emb = tf.reshape(self.inputs_emb, [-1, self.emb_dim])
        self.inputs_emb = tf.split(0, self.num_steps, self.inputs_emb)

        # lstm cell
        lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)
        lstm_cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim)

        # dropout
        if is_training:
            lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_fw, output_keep_prob=(1 - self.dropout_rate))
            lstm_cell_bw = tf.nn.rnn_cell.DropoutWrapper(lstm_cell_bw, output_keep_prob=(1 - self.dropout_rate))

        lstm_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * self.num_layers)
        lstm_cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_bw] * self.num_layers)

        # get the length of each sample
        self.length = tf.reduce_sum(tf.sign(self.inputs), reduction_indices=1)
        self.length = tf.cast(self.length, tf.int32)  
        
        # forward and backward
        self.outputs, _, _ = rnn.bidirectional_rnn(
            lstm_cell_fw, 
            lstm_cell_bw,
            self.inputs_emb, 
            dtype=tf.float32,
            sequence_length=self.length
        )
        
        # softmax
        self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, self.hidden_dim * 2])
        self.softmax_w = tf.get_variable("softmax_w", [self.hidden_dim * 2, self.num_classes])
        self.softmax_b = tf.get_variable("softmax_b", [self.num_classes])
        self.logits = tf.matmul(self.outputs, self.softmax_w) + self.softmax_b

        if not is_crf:
            pass
        else:
            self.tags_scores = tf.reshape(self.logits, [self.batch_size, self.num_steps, self.num_classes])
            self.transitions = tf.get_variable("transitions", [self.num_classes + 1, self.num_classes + 1])
            
            dummy_val = -1000
            class_pad = tf.Variable(dummy_val * np.ones((self.batch_size, self.num_steps, 1)), dtype=tf.float32)
            self.observations = tf.concat(2, [self.tags_scores, class_pad])

            begin_vec = tf.Variable(np.array([[dummy_val] * self.num_classes + [0] for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32)
            end_vec = tf.Variable(np.array([[0] + [dummy_val] * self.num_classes for _ in range(self.batch_size)]), trainable=False, dtype=tf.float32) 
            begin_vec = tf.reshape(begin_vec, [self.batch_size, 1, self.num_classes + 1])
            end_vec = tf.reshape(end_vec, [self.batch_size, 1, self.num_classes + 1])

            self.observations = tf.concat(1, [begin_vec, self.observations, end_vec])

            self.mask = tf.cast(tf.reshape(tf.sign(self.targets),[self.batch_size * self.num_steps]), tf.float32)
            
            # point score
            self.point_score = tf.gather(tf.reshape(self.tags_scores, [-1]), tf.range(0, self.batch_size * self.num_steps) * self.num_classes + tf.reshape(self.targets,[self.batch_size * self.num_steps]))
            self.point_score *= self.mask
            
            # transition score
            self.trans_score = tf.gather(tf.reshape(self.transitions, [-1]), self.targets_transition)
            
            # real score
            self.target_path_score = tf.reduce_sum(self.point_score) + tf.reduce_sum(self.trans_score)
                        
            # tf.initialize_all_variables()
            # sess = tf.Session()
            # sess.run(self.transitions.eval())

            # all path score
            self.total_path_score, self.max_scores, self.max_scores_pre  = self.forward(self.observations, self.transitions, self.length)
            
            # loss
            self.loss = - (self.target_path_score - self.total_path_score)
        
        # summary
        self.train_summary = tf.scalar_summary("loss", self.loss)
        self.val_summary = tf.scalar_summary("loss", self.loss)        
        
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 
Esempio n. 42
0
    def __init__(self, config):
        sent_len = self.sent_len = config.sent_len
        word_len = config.word_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        keep_prob1 = config.keep_prob1
        keep_prob2 = config.keep_prob2
        num_layers1 = config.num_layers1
        num_layers2 = config.num_layers2
        state_size1 = config.state_size1
        state_size2 = config.state_size2

        self.input_data = tf.placeholder(tf.int32, [batch_size*sent_len, word_len])
        self.lengths = tf.placeholder(tf.int64,[batch_size])
        self.wordlengths = tf.placeholder(tf.int64, [batch_size*sent_len])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size)
            lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size)
            if keep_prob1 < 1:
                #Only on the inputs for rnn1. That way we don't dropout twice 
                lstm_cell_1 = rnn_cell.DropoutWrapper(
                  lstm_cell_1, input_keep_prob=keep_prob1)
                lstm_back_cell_1 = rnn_cell.DropoutWrapper(
                  lstm_back_cell_1, input_keep_prob=keep_prob1)

            cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1)
            backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] * num_layers1)
            
            rnn_splits = [tf.squeeze(input_, [1]) for input_ in tf.split(1, word_len, inputs)]


            # Run the bidirectional rnn
            outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn(
                                                        cell_1, backcell_1, rnn_splits,
                                                        sequence_length=self.wordlengths,
                                                        dtype=tf.float32)

        #tok_embeds = outputs1[-1]
        tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1])
        
        with tf.variable_scope("rnn2", reuse=None):
            lstm_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1*4)
            lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2, input_size=state_size1*4)
            # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer
            # is the output from the conv net, so this also adds dropout to the output of the conv net
            if keep_prob2 < 1:
                lstm_cell_2 = rnn_cell.DropoutWrapper(
                  lstm_cell_2, input_keep_prob=keep_prob2,
                             output_keep_prob=keep_prob2)
                lstm_back_cell_2 = rnn_cell.DropoutWrapper(
                  lstm_back_cell_2, input_keep_prob=keep_prob2,
                                  output_keep_prob=keep_prob2) 

            cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2)
            backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] * num_layers2)

            # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2]
            # we want it to be a list of sent_len of [batch_size, state_size*2]
            # We partition as [0,1,2,...n,0,1,2,...n...]
            rnn_inputs2 = tf.dynamic_partition(tok_embeds, list(range(sent_len))*batch_size, sent_len)
            

            #Sent level rnn
            outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn(cell_2, backcell_2, rnn_inputs2,
                                                                        sequence_length=self.lengths,
                                                                        dtype=tf.float32)
            #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4])
            sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2])

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [state_size2*4, 1])
            b = tf.get_variable("b", [1])
            raw_logits = tf.matmul(sent_embed, w) + b 
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False) #Assign to overwrite
        optimizer = tf.train.AdamOptimizer()
        grads, _vars = zip(*optimizer.compute_gradients(self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                      config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))
Esempio n. 43
0
    def __init__(self, config):
        sent_len = config.sent_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        num_layers = config.num_layers
        state_size = config.state_size
        keep_prob = config.keep_prob

        self.input_data = tf.placeholder(tf.int32, [batch_size, sent_len])
        self.lengths = tf.placeholder(tf.int64, [batch_size])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embeding = tf.get_variable("embeding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embeding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            cell = rnn_cell.LSTMCell(
                state_size,
                input_size=embed_size,
                initializer=tf.contrib.layers.xavier_initializer())
            back_cell = rnn_cell.LSTMCell(
                state_size,
                input_size=embed_size,
                initializer=tf.contrib.layers.xavier_initializer())
            cell = rnn_cell.DropoutWrapper(cell,
                                           input_keep_prob=keep_prob,
                                           output_keep_prob=keep_prob)
            back_cell = rnn_cell.DropoutWrapper(back_cell,
                                                input_keep_prob=keep_prob,
                                                output_keep_prob=keep_prob)
            cell = rnn_cell.MultiRNNCell([cell] * num_layers)
            backcell = rnn_cell.MultiRNNCell([back_cell] * num_layers)

            rnn_splits = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, sent_len, inputs)
            ]

            # Run the bidirectional rnn
            outputs, last_fw_state, last_bw_state = rnn.bidirectional_rnn(
                cell,
                backcell,
                rnn_splits,
                sequence_length=self.lengths,
                dtype=tf.float32)

        sent_out = tf.concat(1, [last_fw_state, last_bw_state])
        #sent_out = outputs[-1]
        #sent_out = tf.add_n(outputs)
        output_size = state_size * 4

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [output_size, 1])
            b = tf.get_variable("b", [1],
                                initializer=tf.constant_initializer(0.0))
            raw_logits = tf.matmul(sent_out, w) + b
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False)  #Assign to overwrite
        optimizer = tf.train.AdamOptimizer()
        grads, _vars = zip(*optimizer.compute_gradients(
            self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                                       config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))
Esempio n. 44
0
    def __init__(self, config):
        sent_len = self.sent_len = config.sent_len
        word_len = config.word_len
        batch_size = config.batch_size
        vocab_size = config.vocab_size
        embed_size = config.embed_size
        keep_prob1 = config.keep_prob1
        keep_prob2 = config.keep_prob2
        num_layers1 = config.num_layers1
        num_layers2 = config.num_layers2
        state_size1 = config.state_size1
        state_size2 = config.state_size2

        self.input_data = tf.placeholder(tf.int32,
                                         [batch_size * sent_len, word_len])
        self.lengths = tf.placeholder(tf.int64, [batch_size])
        self.wordlengths = tf.placeholder(tf.int64, [batch_size * sent_len])
        self.targets = tf.placeholder(tf.float32, [batch_size, 1])

        # Get embedding layer which requires CPU
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, embed_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        #LSTM 1 -> Encode the characters of every tok into a fixed dense representation
        with tf.variable_scope("rnn1", reuse=None):
            lstm_cell_1 = rnn_cell.LSTMCell(state_size1, input_size=embed_size)
            lstm_back_cell_1 = rnn_cell.LSTMCell(state_size1,
                                                 input_size=embed_size)
            if keep_prob1 < 1:
                #Only on the inputs for rnn1. That way we don't dropout twice
                lstm_cell_1 = rnn_cell.DropoutWrapper(
                    lstm_cell_1, input_keep_prob=keep_prob1)
                lstm_back_cell_1 = rnn_cell.DropoutWrapper(
                    lstm_back_cell_1, input_keep_prob=keep_prob1)

            cell_1 = rnn_cell.MultiRNNCell([lstm_cell_1] * num_layers1)
            backcell_1 = rnn_cell.MultiRNNCell([lstm_back_cell_1] *
                                               num_layers1)

            rnn_splits = [
                tf.squeeze(input_, [1])
                for input_ in tf.split(1, word_len, inputs)
            ]

            # Run the bidirectional rnn
            outputs1, last_fw_state1, last_bw_state1 = rnn.bidirectional_rnn(
                cell_1,
                backcell_1,
                rnn_splits,
                sequence_length=self.wordlengths,
                dtype=tf.float32)

        #tok_embeds = outputs1[-1]
        tok_embeds = tf.concat(1, [last_fw_state1, last_bw_state1])

        with tf.variable_scope("rnn2", reuse=None):
            lstm_cell_2 = rnn_cell.LSTMCell(state_size2,
                                            input_size=state_size1 * 4)
            lstm_back_cell_2 = rnn_cell.LSTMCell(state_size2,
                                                 input_size=state_size1 * 4)
            # Add dropout. NOTE: this adds to the input and output layers. Remember that the input layer
            # is the output from the conv net, so this also adds dropout to the output of the conv net
            if keep_prob2 < 1:
                lstm_cell_2 = rnn_cell.DropoutWrapper(
                    lstm_cell_2,
                    input_keep_prob=keep_prob2,
                    output_keep_prob=keep_prob2)
                lstm_back_cell_2 = rnn_cell.DropoutWrapper(
                    lstm_back_cell_2,
                    input_keep_prob=keep_prob2,
                    output_keep_prob=keep_prob2)

            cell_2 = rnn_cell.MultiRNNCell([lstm_cell_2] * num_layers2)
            backcell_2 = rnn_cell.MultiRNNCell([lstm_back_cell_2] *
                                               num_layers2)

            # The rnn synthesis of the tokens is size [batch_size*sent_len, state_size*2]
            # we want it to be a list of sent_len of [batch_size, state_size*2]
            # We partition as [0,1,2,...n,0,1,2,...n...]
            rnn_inputs2 = tf.dynamic_partition(
                tok_embeds,
                list(range(sent_len)) * batch_size, sent_len)

            #Sent level rnn
            outputs2, last_fw_state2, last_bw_state2 = rnn.bidirectional_rnn(
                cell_2,
                backcell_2,
                rnn_inputs2,
                sequence_length=self.lengths,
                dtype=tf.float32)
            #sent_embed = tf.reshape(tf.concat(1, [last_fw_state2, last_bw_state2]), [batch_size, state_size2*4])
            sent_embed = tf.concat(1, [last_fw_state2, last_bw_state2])

        with tf.variable_scope("linear", reuse=None):
            w = tf.get_variable("w", [state_size2 * 4, 1])
            b = tf.get_variable("b", [1])
            raw_logits = tf.matmul(sent_embed, w) + b
        self.probabilities = tf.sigmoid(raw_logits)
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(raw_logits, self.targets))

        #Calculate gradients and propagate
        #Aggregation method 2 is really important for rnn per the tensorflow issues list
        tvars = tf.trainable_variables()
        self.lr = tf.Variable(0.0, trainable=False)  #Assign to overwrite
        optimizer = tf.train.AdamOptimizer()
        grads, _vars = zip(*optimizer.compute_gradients(
            self.cost, tvars, aggregation_method=2))
        grads, self.grad_norm = tf.clip_by_global_norm(grads,
                                                       config.max_grad_norm)
        self.train_op = optimizer.apply_gradients(zip(grads, _vars))
Esempio n. 45
0
    def __init__(self, sess, params, vocabs_size):
        NNModel.Model.__init__(self, vocabs_size)

        self.params = params

        self.batch_size = self.params.get("batch_size")
        self.max_length = self.params.get("max_length")
        self.size = self.params.get("size")
        self.num_layers = self.params.get("num_layers")
        # the learning rate could be a float, but this way we can adjust it during training
        # self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate = self.params.get("learning_rate")
        self.embedding_size = self.params.get("embedding_size")
        # self.global_step = tf.Variable(0, trainable=False)
        self.incorrect = [0] * self.max_length
        self.global_step = 0
        self.corpus_name = self.params.get("corpus_name")

        logging.info(
            "BiRNN model created with {0} layers of {1} cells. Embedding = {2}. Vocabulary sizes = {3}, length = {4}, batch = {5}."
            .format(self.num_layers, self.size, self.embedding_size,
                    vocabs_size, self.max_length, self.batch_size))

        # forward RNN
        with tf.variable_scope('forward'):
            fcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size)
            forward_cell = fcell
            if self.num_layers > 1:
                fcell2 = rnn_cell.GRUCell(self.size)
                forward_cell = rnn_cell.MultiRNNCell([fcell] +
                                                     ([fcell2] *
                                                      self.num_layers))

        # backward RNN
        with tf.variable_scope('backward'):
            bcell = rnn_cell.GRUCell(self.size, input_size=self.embedding_size)
            backward_cell = bcell
            if self.num_layers > 1:
                bcell2 = rnn_cell.GRUCell(self.size)
                backward_cell = rnn_cell.MultiRNNCell([bcell] +
                                                      ([bcell2] *
                                                       self.num_layers))

        #seq_len = tf.fill([self.batch_size], constant(self.max_length, dtype=tf.int64))

        # self.inputs = tf.placeholder(tf.float32, shape=[self.max_length, self.batch_size, self.vocab_sizes[0]], name="inputs")
        self.inputs = [
            tf.placeholder(tf.int32, shape=[None], name="inputs{0}".format(i))
            for i in range(self.max_length)
        ]
        self.targets = [
            tf.placeholder(tf.int32, shape=[None], name="targets{0}".format(i))
            for i in range(self.max_length)
        ]

        self.sentence_lengths = tf.placeholder(tf.int64,
                                               shape=[None],
                                               name="sequence_lengths")
        self.dropout_placeholder = tf.placeholder(tf.float32,
                                                  shape=[],
                                                  name="dropout")

        self.word_embeddings = tf.Variable(
            tf.random_uniform([self.vocab_sizes[0], self.embedding_size], -1.0,
                              1.0))
        embedded_inputs = [
            tf.nn.embedding_lookup(self.word_embeddings, input_)
            for input_ in self.inputs
        ]
        dropped_embedded_inputs = [
            tf.nn.dropout(i, self.dropout_placeholder) for i in embedded_inputs
        ]  # dropout je realny cislo

        weights = {
            # Hidden layer weights => 2*n_hidden because of foward + backward cells
            # 'hidden': tf.Variable(tf.random_uniform([self.vocab_sizes[0], 2 * size]), name="hidden-weight"),
            'out':
            tf.Variable(tf.random_uniform([2 * self.size,
                                           self.vocab_sizes[1]]),
                        name="out-weight")
        }
        biases = {
            # 'hidden': tf.Variable(tf.random_uniform([2 * size]), name="hidden-bias"),
            'out':
            tf.Variable(tf.random_uniform([self.vocab_sizes[1]]),
                        name="out-bias")
        }

        # hack to omit information from RNN creation
        logging.getLogger().setLevel(logging.CRITICAL)
        with tf.variable_scope('BiRNN-net'):
            # bidi_layer = BidirectionalRNNLayer(forward_cell, backward_cell, dropped_embedded_inputs, self.sentence_lengths)
            # with tf.variable_scope('forward'):
            #     output_fw, last_state = rnn.rnn(cell=forward_cell, inputs=dropped_embedded_inputs, dtype=tf.float32, sequence_length=self.sentence_lengths)
            #
            # with tf.variable_scope('backward'):
            #     outputs_rev_rev, last_state_rev = rnn.rnn(cell=backward_cell, inputs=rnn._reverse_seq(dropped_embedded_inputs, self.sentence_lengths), dtype=tf.float32,
            #                                               sequence_length=self.sentence_lengths)
            #     output_bw = self.rnn._reverse_seq(outputs_rev_rev, self.sentence_lengths)
            #
            # outputs = [array_ops.concat(1, [fw, bw]) for fw, bw in zip(output_fw, output_bw)]
            outputs = rnn.bidirectional_rnn(
                forward_cell,
                backward_cell,
                dropped_embedded_inputs,
                sequence_length=self.sentence_lengths,
                dtype=tf.float32)

        logging.getLogger().setLevel(logging.INFO)

        self.out = []
        self.probs = []
        # after switch to TF 0.8 it started outputing some merges for FC a BC
        for o in outputs[0]:
            # TODO ############# pridat tf.nn.relu(MATMUL+BIAs) ???
            intermediate_out = tf.matmul(o, weights['out']) + biases['out']
            self.out.append(intermediate_out)
            self.probs.append(tf.nn.softmax(intermediate_out))

        loss = seq2seq.sequence_loss_by_example(self.out, self.targets,
                                                [tf.ones([self.batch_size])] *
                                                self.max_length,
                                                self.vocab_sizes[1])

        self.cost = tf.reduce_sum(loss) / self.batch_size

        tf.scalar_summary("Cost", self.cost)

        self.updates = tf.train.AdamOptimizer(
            self.learning_rate).minimize(loss)

        self.saver = tf.train.Saver(max_to_keep=0)  # don't remove old models

        self.summaries = tf.merge_all_summaries()
        self.sum_writer = tf.python.training.summary_io.SummaryWriter(
            "tmp", sess.graph)

        # Initializing the variables & Launch the graph

        sess.run(tf.initialize_all_variables())
        logging.info("BiRNN model initialized.")