Exemple #1
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.5,
                 max_gradient_norm=5.0,
                 model='LSTM'):
        #todo: implement placeholders
        self.texts = tf.placeholder(dtype=tf.string,
                                    shape=[None, None])  # shape: batch*len
        self.texts_length = tf.placeholder(dtype=tf.int32,
                                           shape=None)  # shape: batch
        self.labels = tf.placeholder(dtype=tf.int64,
                                     shape=None)  # shape: batch

        self.keep_prob = tf.placeholder(dtype=tf.float32)

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)  # batch*len

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(
            self.embed, self.index_input)  #batch*len*embed_unit

        #todo: implement unfinished networks

        if num_layers == 1:
            if model == 'LSTM':
                cell = BasicLSTMCell(num_units)
            elif model == 'RNN':
                cell = BasicRNNCell(num_units)
            elif model == 'GRU':
                cell = GRUCell(num_units)
            else:
                print("Wrong model!")
                return
            cell_dr = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob)
            outputs, states = dynamic_rnn(cell_dr,
                                          self.embed_input,
                                          self.texts_length,
                                          dtype=tf.float32,
                                          scope="rnn")
            if model == 'LSTM':
                h_state = states[0]
            else:
                h_state = states
        else:
            if model == 'LSTM':
                cell = BasicLSTMCell(num_units)
            elif model == 'RNN':
                cell = BasicRNNCell(num_units)
            elif model == 'GRU':
                cell = GRUCell(num_units)
            else:
                print("Wrong model!")
                return
            cell_dr = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob)
            multi_cell = tf.contrib.rnn.MultiRNNCell([cell_dr] * num_layers,
                                                     state_is_tuple=True)
            init_state = multi_cell.zero_state(16, tf.float32)
            outputs, state = tf.nn.dynamic_rnn(multi_cell,
                                               self.embed_input,
                                               self.texts_length,
                                               dtype=tf.float32,
                                               scope="rnn",
                                               initial_state=init_state,
                                               time_major=False)
            h_state = outputs[:, -1, :]

        logits = tf.layers.dense(h_state, num_labels)

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0],
                                        dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
Exemple #2
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.5,
                 max_gradient_norm=5.0):
        #todo: implement placeholders
        self.texts = tf.placeholder(tf.string, [None, None],
                                    name="texts")  # shape: batch*len
        self.texts_length = tf.placeholder(tf.int64, [None],
                                           name="texts_length")  # shape: batch
        self.labels = tf.placeholder(tf.int64, [None],
                                     name="labels")  # shape: batch

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        learning_rate_decay_factor = 0.9
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)  # batch*len

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(
            self.embed, self.index_input)  #batch*len*embed_unit

        model = 'lstm'

        if num_layers == 1:
            if (model == 'rnn'):
                cell = BasicRNNCell(num_units)
            elif (model == 'gru'):
                cell = GRUCell(num_units)
            elif (model == 'lstm'):
                cell = BasicLSTMCell(num_units)

            cell_do = tf.nn.rnn_cell.DropoutWrapper(
                cell, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)
            outputs, states = dynamic_rnn(cell_do,
                                          self.embed_input,
                                          self.texts_length,
                                          dtype=tf.float32,
                                          scope="rnn")
            #todo: implement unfinished networks
            outputs_flat = tf.reduce_mean(outputs, 1)
            if (model == 'lstm'):
                states = states[0]
            # W_f = weight_variable([tf.app.flags.FLAGS.units, 5])
            # b_f = bias_variable([5])
            # logits = tf.matmul(outputs_flat, W_f) + b_f
            # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu)
            logits = tf.layers.dense(inputs=states, units=5, activation=None)

        else:
            self.reverse_texts = tf.placeholder(
                tf.string, [None, None],
                name="reverse_texts")  # shape: batch*len
            self.index_reverse_input = self.symbol2index.lookup(
                self.reverse_texts)
            self.embed_reverse_input = tf.nn.embedding_lookup(
                self.embed, self.index_reverse_input)  #batch*len*embed_unit

            if (model == 'rnn'):
                cell1 = BasicRNNCell(num_units)
                cell2 = BasicRNNCell(num_units)
            elif (model == 'gru'):
                cell1 = GRUCell(num_units)
                cell2 = GRUCell(num_units)
            elif (model == 'lstm'):
                cell1 = BasicLSTMCell(num_units)
                cell2 = BasicLSTMCell(num_units)

            cell1_do = tf.nn.rnn_cell.DropoutWrapper(
                cell1, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)
            cell2_do = tf.nn.rnn_cell.DropoutWrapper(
                cell2, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob)

            outputs1, states1 = dynamic_rnn(cell1_do,
                                            self.embed_input,
                                            self.texts_length,
                                            dtype=tf.float32,
                                            scope="rnn")
            outputs2, states2 = dynamic_rnn(cell2_do,
                                            self.embed_reverse_input,
                                            self.texts_length,
                                            dtype=tf.float32,
                                            scope="rnn")

            if (model == 'lstm'):
                states = states1[0] + states2[0]
            else:
                states = states1 + states2

            # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu)
            logits = tf.layers.dense(inputs=states, units=5, activation=None)

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0],
                                        dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        # opt = tf.train.AdamOptimizer(self.learning_rate)

        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
Exemple #3
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.001,
                 max_gradient_norm=5.0,
                 learning_rate_decay_factor=0.1):
        #todo: implement placeholders
        # PROBLEMS REMAIN
        self.texts = tf.placeholder(dtype=tf.string,
                                    shape=[None, None])  # shape: batch*len
        self.texts_length = tf.placeholder(dtype=tf.int32,
                                           shape=[None])  # shape: batch
        self.labels = tf.placeholder(dtype=tf.int32,
                                     shape=[None])  # shape: batch
        self.output_keep_prob = tf.placeholder(dtype=tf.float32, shape=[])

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_update_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        self.index_input = self.symbol2index.lookup(self.texts)  # batch*len

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.embed_input = tf.nn.embedding_lookup(
            self.embed, self.index_input)  #batch*len*embed_unit

        if num_layers == 1:
            # basic rnn
            # cell = BasicRNNCell(num_units)
            # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn")

            # gru
            # cell = GRUCell(num_units)
            # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn")

            # lstm
            # cell = BasicLSTMCell(num_units)
            # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn")
            # states = states[1]

            # final model
            cell = tf.nn.rnn_cell.DropoutWrapper(
                BasicLSTMCell(num_units),
                output_keep_prob=self.output_keep_prob)
            cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                BasicLSTMCell(num_units),
                output_keep_prob=self.output_keep_prob)
            outputs, states = bidirectional_dynamic_rnn(cell,
                                                        cell_bw,
                                                        self.embed_input,
                                                        self.texts_length,
                                                        dtype=tf.float32,
                                                        scope="rnn")
            states = states[0][1] + states[1][1]
        else:
            cells = []
            cells_bw = []
            for _ in range(num_layers):
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    GRUCell(num_units), output_keep_prob=output_keep_prob)
                cells.append(cell)
                cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                    GRUCell(num_units), output_keep_prob=output_keep_prob)
                cells_bw.append(cell_bw)
            cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
            cell_bw = tf.contrib.rnn.MultiRNNCell(cells_bw,
                                                  state_is_tuple=True)
            outputs, states = bidirectional_dynamic_rnn(cell,
                                                        cell_bw,
                                                        self.embed_input,
                                                        self.texts_length,
                                                        dtype=tf.float32,
                                                        scope="stacked_rnn")
            states = states[0][num_layers - 1] + states[1][num_layers - 1]

        #todo: implement unfinished networks
        self.w1 = tf.Variable(
            tf.random_normal(shape=[num_units, num_labels],
                             stddev=tf.sqrt(2.0 / (num_units + num_labels))))
        self.b1 = tf.Variable(tf.constant(0.0, shape=[num_labels]))
        logits = tf.matmul(states, self.w1) + self.b1

        self.loss = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                           logits=logits),
            name='loss')
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0],
                                        dtype=tf.float32)
        self.predict_labels = tf.argmax(logits,
                                        1,
                                        'predict_labels',
                                        output_type=tf.int32)
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, self.predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
Exemple #4
0
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            num_layers,
            num_labels,
            embed,
            learning_rate=0.5,
            max_gradient_norm=5.0,
	    keep_prob=1.,
	    weight_decay=1e-10,
            RNN_type="BasicRNN"):
        #todo: implement placeholders
        self.texts = tf.placeholder(dtype = tf.string, shape = [None, None])
        self.texts_length = tf.placeholder(dtype = tf.int32, shape = [None])
        self.labels = tf.placeholder(dtype = tf.int64, shape = [None])
        '''
        self.texts = tf.placeholder()  # shape: batch*len
        self.texts_length = tf.placeholder()  # shape: batch
        self.labels = tf.placeholder()  # shape: batch
        '''
        
        self.symbol2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=UNK_ID,
                shared_name="in_table",
                name="in_table",
                checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
	self.weight_decay = tf.Variable(float(weight_decay), trainable=False, dtype=tf.float32)
	self.keep_prob = tf.Variable(float(keep_prob), trainable=False, dtype=tf.float32)

        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)


        self.index_input = self.symbol2index.lookup(self.texts)   # batch*len
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)
        
        
        self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) #batch*len*embed_unit

	# bi-LSTM
	with tf.variable_scope("foward_cell"):
		#fw_cell = tf.contrib.rnn.GRUCell(num_units)
		if RNN_type == "LSTM":
			fw_cell = BasicLSTMCell(num_units)
		else:
			fw_cell = GRUCell(num_units)
		'''
		fw_cell = tf.contrib.rnn.GRUCell(num_units)
		fw_cell = tf.contrib.rnn.GRUCell(num_units)
		'''
	with tf.variable_scope("barkward_cell"):
		#bw_cell = tf.contrib.rnn.GRUCell(num_units)
		if RNN_type == "LSTM":
			bw_cell = BasicLSTMCell(num_units)
		else:
			bw_cell = GRUCell(num_units)
        
	'''
        if num_layers == 1:
            if RNN_type == "BasicRNN":
                cell = BasicRNNCell(num_units)
	        # cell = tf.contrib.rnn.BasicRNNCell(num_units)
            elif RNN_type == "GRU":
                cell = GRUCell(num_units)
            elif RNN_type == "LSTM":
                cell = BasicLSTMCell(num_units)
        outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn")
	'''

	outputs, states = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_input, self.texts_length, dtype = tf.float32, scope = "bi_lstm")
	#print "***state: ", states

	#self.y0 = tf.reduce_max(outputs, axis = 1)
	#self.y0 = tf.reduce_max(outputs[0] + outputs[1], axis = 1)
	#self.y0 = tf.reduce_sum(states, axis = 0)
	self.y0 = states[0][1] + states[1][1]
	#print "****** y0:", self.y0
        self.y0_dp = tf.nn.dropout(self.y0, keep_prob = self.keep_prob)

	self.y1 = tf.layers.dense(inputs = self.y0_dp, units = 128, activation = tf.nn.sigmoid)
	self.y2 = tf.layers.dense(inputs = self.y0_dp, units = num_labels)
	logits = self.y2

	'''
        self.W1 = tf.Variable(tf.truncated_normal(stddev = .1, shape = [num_units, 128]))
        self.b1 = tf.Variable(tf.constant(.1, shape = [128]))
        self.u1 = tf.matmul(self.y0_dp, self.W1) + self.b1
        self.y1 = tf.nn.sigmoid(self.u1)

        self.W2 = tf.Variable(tf.truncated_normal(stddev = .1, shape = [128, 5]))
        self.b2 = tf.Variable(tf.constant(.1, shape = [5]))
        self.u2 = tf.matmul(self.y1, self.W2) + self.b2
	'''

	# logits = tf.layers.dense(inputs = self.y1, units = 5)
	# logits = self.u2

        #todo: implement unfinished networks

	with tf.name_scope("l2_loss"):
		vars   = tf.trainable_variables() 
		self.lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in vars ]) * self.weight_decay

        self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') + self.lossL2
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy')

        self.params = tf.trainable_variables()
            
        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        #opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
        self.train_op = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)
	#self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step,var_list=self.params)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)

        self.merged_summary_op = tf.summary.merge_all()
        
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, 
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
Exemple #5
0
    def __init__(self,
            num_symbols,
            num_embed_units,
            num_units,
            num_layers,
            num_labels,
            embed,
            learning_rate=0.5,
            max_gradient_norm=5.0):
        #todo: implement placeholders
        self.texts = tf.placeholder(tf.string, [None, None], name='texts')  # shape: batch*len
        self.texts_length = tf.placeholder(tf.int32, [None], name='texts_length')  # shape: batch
        self.labels = tf.placeholder(tf.int32, [None], 'labels')  # shape: batch
        
        self.symbol2index = MutableHashTable(
                key_dtype=tf.string,
                value_dtype=tf.int64,
                default_value=UNK_ID,
                shared_name="in_table",
                name="in_table",
                checkpoint=True)
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)


        self.index_input = self.symbol2index.lookup(self.texts)   # batch*len
        
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)
        
        
        self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) #batch*len*embed_unit

        
        if num_layers == 1:
            cell = BasicLSTMCell(num_units)
        
        cell_dr = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5)
        outputs, states = dynamic_rnn(cell_dr, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn")
        labels = self.labels
        indices = tf.stack([tf.range(tf.shape(outputs)[0]),
                            self.texts_length - 1], axis=1)
        last_output = tf.gather_nd(outputs, indices)
        self.outputs = outputs
        self.states = states
        self.last_output = last_output
        #last_output = tf.reshape(last_output, (-1, num_units))
        logits = tf.layers.dense(last_output, 5)
        print(indices.shape)
        print(last_output.shape)



        self.logits = logits
        self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss')
        l2 = tf.cast(0.005,tf.float32) * sum(
                                             tf.nn.l2_loss(tf_var)
                                             for tf_var in tf.trainable_variables()
                                             if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
                                             )
        self.loss += l2
        mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
        predict_labels = tf.cast(tf.argmax(logits, 1, 'predict_labels'), tf.int32)
        self.predict_labels = predict_labels
        self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy')

        self.params = tf.trainable_variables()
            
        # calculate the gradient of parameters
        #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        #opt = tf.train.RMSPropOptimizer(self.learning_rate)
        opt = tf.train.AdamOptimizer(self.learning_rate)
        gradients = tf.gradients(mean_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)

        tf.summary.scalar('loss/step', self.loss)
        for each in tf.trainable_variables():
            tf.summary.histogram(each.name, each)
        tf.summary.histogram('logits', logits)
        tf.summary.histogram('gradient_norm', self.gradient_norm)
        tf.summary.histogram('rnn_output', self.last_output)
        for param in self.params:
            tf.summary.histogram('clipped_gradients/%s'%param.name, param)

        self.merged_summary_op = tf.summary.merge_all()
        
        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, 
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)