Beispiel #1
0
 def prediction(self):
     network = rnn_cell.LSTMCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(network, output_keep_prob=self.dropout)
     if self._num_layers > 1:
         network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, state = rnn.dynamic_rnn(network, self.data, dtype=tf.float32, sequence_length=self.length)
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
 def prediction(self):
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     last = self._last_relevant(output, self.length)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.GRUCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(
         network, output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32)
     # Select last output.
     output = tf.transpose(output, [1, 0, 2])
     last = tf.gather(output, int(output.get_shape()[0]) - 1)
     # Softmax layer.
     weight, bias = self._weight_and_bias(
         self._num_hidden, int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Beispiel #4
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.LSTMCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(
         network, output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, self.data, dtype=tf.float32)
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
Beispiel #5
0
 def prediction(self):
     # Recurrent network.
     network = rnn_cell.GRUCell(self._num_hidden)
     network = rnn_cell.DropoutWrapper(
         network, output_keep_prob=self.dropout)
     network = rnn_cell.MultiRNNCell([network] * self._num_layers)
     output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32)
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
 def prediction(self):
     #运行结果给cost计算交叉熵或者计算error等损失函数
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     #训练结束后,传进来一个序列进行预测时,dynamic_rnn的output要进行last_relevant
     last = self._last_relevant(output, self.length)
     # Softmax layer.
     weight, bias = self._weight_and_bias(self._num_hidden,
                                          int(self.target.get_shape()[1]))
     prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
     return prediction
Beispiel #7
0
 def prediction(self):
     # Recurrent network.
     output, _ = rnn.dynamic_rnn(
         rnn_cell.GRUCell(self._num_hidden),
         self.data,
         dtype=tf.float32,
         sequence_length=self.length,
     )
     # Softmax layer.
     max_length = int(self.target.get_shape()[1])
     num_classes = int(self.target.get_shape()[2])
     weight, bias = self._weight_and_bias(self._num_hidden, num_classes)
     # Flatten to apply same weights to all time steps.
     output = tf.reshape(output, [-1, self._num_hidden])
     prediction = tf.nn.softmax(tf.matmul(output, weight) + bias)
     prediction = tf.reshape(prediction, [-1, max_length, num_classes])
     return prediction
def create_rnn(max_steps, n_input, mem_nrow, mem_ncol):
  # Batch size, max_steps, n_input
  x = tf.placeholder("float", [None, None, n_input])
  y = tf.placeholder("float", [None, None, n_input])
  nsteps = tf.placeholder("int32")
  ntm_cell = ntm.NTMCell(
      n_inputs=n_input,
      n_outputs=n_input,
      n_hidden=100,
      mem_nrows=mem_nrow,
      mem_ncols=mem_ncol,
      n_heads=1,
  )
  outputs, _ = rnn.dynamic_rnn(
      ntm_cell,
      x,
      dtype=tf.float32,
      sequence_length=nsteps,
  )

  # Loss measures
  cost = var_seq_loss(outputs, y, nsteps)
  err = bits_err_per_seq(outputs, y, nsteps)

  # Optimizer params as described in paper.
  opt = tf.train.RMSPropOptimizer(
    learning_rate=1e-4,
    momentum=0.9,
  )
  # Gradient clipping as described in paper.
  gvs = opt.compute_gradients(cost)
  clipped_gvs = []
  for g, v in gvs:
    clipped_gvs.append((tf.clip_by_value(g, -10, 10), v))
  optimizer = opt.apply_gradients(clipped_gvs)

  return {
    'x': x,
    'y': y,
    'steps': nsteps,
    'cost': cost,
    'err': err,
    'optimizer': optimizer,
    'pred': predict(outputs, nsteps),
  }
Beispiel #9
0
    def __init__(self, num_labels, num_layers, hidden_size, dropout,
        batch_size, learning_rate, lr_decay_factor, grad_clip,
        max_input_seq_length, max_target_seq_length, input_dim, forward_only=False):
        '''
        Acoustic rnn model, using ctc loss with lstm cells
        Inputs:
        num_labels - dimension of character input/one hot encoding
        num_layers - number of lstm layers
        hidden_size - size of hidden layers
        dropout - probability of dropping hidden weights
        batch_size - number of training examples fed at once
        learning_rate - learning rate parameter fed to optimizer
        grad_clip - max gradient size (prevent exploding gradients)
        max_seq_length - maximum length of input vector sequence
        input_dim - dimension of input vector
        forward_only - whether to build back prop nodes or not
        '''
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
        	self.learning_rate * lr_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.dropout_keep_prob_lstm_input = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_output = tf.constant(self.dropout)
        self.max_input_seq_length = max_input_seq_length
        self.max_target_seq_length = max_target_seq_length
        #graph inputs
        self.inputs = tf.placeholder(tf.float32,
            shape=[self.max_input_seq_length, None, input_dim],
            name="inputs")
        self.input_seq_lengths = tf.placeholder(tf.int32,
            shape=[None],
            name="input_seq_lengths")
        self.target_seq_lengths = tf.placeholder(tf.int32,
            shape=[None],
            name="target_seq_lengths")
        #graph sparse tensor inputs
        self.target_indices = tf.placeholder(tf.int64,
            shape=[None,2],
            name="target_indices")
        self.target_vals = tf.placeholder(tf.int32,
            shape=[None],
            name="target_vals")

        #define cells of acoustic model
        cell = rnn_cell.DropoutWrapper(
        rnn_cell.BasicLSTMCell(hidden_size),
        input_keep_prob=self.dropout_keep_prob_lstm_input,
        output_keep_prob=self.dropout_keep_prob_lstm_output)

        if num_layers > 1:
            cell =  rnn_cell.MultiRNNCell([cell] * num_layers)

        #build input layer
        w_i = tf.get_variable("input_w", [input_dim, hidden_size])
        b_i = tf.get_variable("input_b", [hidden_size])

        #make rnn inputs
        inputs = [tf.nn.xw_plus_b(tf.squeeze(i),
        w_i, b_i) for i in tf.split(0, self.max_input_seq_length, self.inputs)]

        #set rnn init state to 0s
        initial_state = cell.zero_state(self.batch_size, tf.float32)

        #build rnn
        rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs),
        sequence_length=self.input_seq_lengths, initial_state=initial_state,
        time_major=True, parallel_iterations=100)

        #build output layer
        w_o = tf.get_variable("output_w", [hidden_size, num_labels])
        b_o = tf.get_variable("output_b", [num_labels])

        #compute logits
        self.logits = [tf.nn.xw_plus_b(tf.squeeze(i),
        w_o, b_o) for i in tf.split(0, self.max_input_seq_length, rnn_output)]
        #setup sparse tensor for input into ctc loss
        sparse_labels = tf.SparseTensor(
        indices=self.target_indices,
        values=self.target_vals,
        shape=[self.batch_size, self.max_target_seq_length])

        #compute ctc loss
        self.ctc_loss = ctc.ctc_loss(tf.pack(self.logits), sparse_labels,
            self.input_seq_lengths)
        self.mean_loss = tf.reduce_mean(self.ctc_loss)
        params = tf.trainable_variables()

        if not forward_only:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            gradients = tf.gradients(self.ctc_loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
            grad_clip)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
            global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Beispiel #10
0
    def build_model(self):
        with tf.variable_scope('RNNTEST'):
            self.sense = tf.placeholder(tf.int32, [None])
            self.arg1 = tf.placeholder(tf.int32, [None, None, 4])
            self.arg2 = tf.placeholder(tf.int32, [None, None, 4])
            self.arg1_len = tf.placeholder(tf.int32, [None])
            self.arg2_len = tf.placeholder(tf.int32, [None])
            self.keep_prob = tf.placeholder(tf.float32)

            arg1_list = tf.split(2, 4, self.arg1)
            arg2_list = tf.split(2, 4, self.arg2)

            with tf.device('/cpu:0'):
                NER_W = tf.get_variable('NER_embed', [
                    self.data_loader.NER_vocab_size, self.NER_embed_size
                ]) if self.NER_embed_size > 0 else None
                lemma_W = tf.get_variable('lemma_embed', [
                    self.data_loader.lemma_vocab_size, self.lemma_embed_size
                ]) if self.lemma_embed_size > 0 else None
                if self.use_pre_trained_embedding:
                    word_W = tf.get_variable(
                        'word_embed',
                        initializer=tf.convert_to_tensor(
                            self.data_loader.pre_trained_word_embeddings,
                            dtype=tf.float32)
                    ) if self.word_embed_size > 0 else None
                else:
                    word_W = tf.get_variable(
                        'word_embed',
                        shape=[
                            self.data_loader.word_vocab_size,
                            self.word_embed_size
                        ]) if self.word_embed_size > 0 else None
                POS_W = tf.get_variable('POS_embed', [
                    self.data_loader.POS_vocab_size, self.POS_embed_size
                ]) if self.POS_embed_size > 0 else None
            arg1_embed_list = []
            arg2_embed_list = []
            for idx, W in enumerate([NER_W, lemma_W, word_W, POS_W]):
                if W is not None:
                    arg1_embed_list.append(
                        tf.nn.embedding_lookup(W,
                                               tf.squeeze(arg1_list[idx],
                                                          [2])))
                    arg2_embed_list.append(
                        tf.nn.embedding_lookup(W,
                                               tf.squeeze(arg2_list[idx],
                                                          [2])))
            arg1 = tf.nn.dropout(tf.concat(2, arg1_embed_list), self.keep_prob)
            arg2 = tf.nn.dropout(tf.concat(2, arg2_embed_list), self.keep_prob)

            encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size)
            decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size)

            with tf.variable_scope('forward_encoder'):
                forward_encoder_outputs, forward_encoder_state = rnn.dynamic_rnn(
                    encoder_lstm_unit, arg1, self.arg1_len, dtype=tf.float32)
            with tf.variable_scope('backward_encoder'):
                backward_encoder_outputs, backward_encoder_state = rnn.dynamic_rnn(
                    encoder_lstm_unit,
                    tf.reverse_sequence(arg1, tf.cast(self.arg1_len, tf.int64),
                                        1),
                    dtype=tf.float32)
            encoder_outputs = tf.concat(2, [
                forward_encoder_outputs,
                tf.reverse_sequence(backward_encoder_outputs,
                                    tf.cast(self.arg1_len, tf.int64), 1)
            ])
            encoder_state = tf.concat(
                1, [forward_encoder_state, backward_encoder_state])

            source = tf.expand_dims(
                encoder_outputs,
                2)  #batch_size x source_len x 1 x source_depth(2*encoder_size)
            attention_W = tf.get_variable(
                'attention_W',
                [1, 1, 2 * self.encoder_size, self.attention_judge_size])
            attention_V = tf.get_variable('attention_V',
                                          [self.attention_judge_size])
            WxH = tf.nn.conv2d(
                source, attention_W, [1, 1, 1, 1],
                'SAME')  #batch_size x source_len x 1 x attention
            self.mask = tf.placeholder(tf.float32, [None, None])

            def attention(input_t, output_t_minus_1, time):
                with tf.variable_scope('attention'):
                    VxS = tf.reshape(
                        rnn_cell.linear(output_t_minus_1,
                                        self.attention_judge_size, True),
                        [-1, 1, 1, self.attention_judge_size
                         ])  #batch_size x 1 x 1 x attention
                _exp = tf.exp(
                    tf.reduce_sum(attention_V * tf.tanh(WxH + VxS),
                                  [3]))  #batch_size x source_len x 1
                _exp = _exp * tf.expand_dims(self.mask, -1)
                attention_weight = _exp / tf.reduce_sum(_exp, [1],
                                                        keep_dims=True)
                attention_t = tf.reduce_sum(encoder_outputs * attention_weight,
                                            [1])
                feed_in_t = tf.tanh(
                    rnn_cell.linear([attention_t, input_t],
                                    self.embedding_size, True))
                return feed_in_t

            with tf.variable_scope('decoder'):
                decoder_outputs, decoder_state = dynamic_rnn_decoder(
                    arg2,
                    decoder_lstm_unit,
                    initial_state=encoder_state,
                    sequence_length=self.arg2_len,
                    loop_function=attention)
            judge = tf.concat(1, [
                tf.reduce_sum(decoder_outputs, [1]) /
                tf.expand_dims(tf.cast(self.arg2_len, tf.float32), -1),
                tf.reduce_sum(encoder_outputs, [1]) /
                tf.expand_dims(tf.cast(self.arg1_len, tf.float32), -1)
            ])
            unscaled_log_distribution = rnn_cell.linear(
                judge, self.data_loader.sense_vocab_size, True)
            self.output = tf.cast(tf.argmax(unscaled_log_distribution, 1),
                                  tf.int32)
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.output, self.sense), tf.float32))

            #max-margin method
            #self._MM = tf.placeholder(tf.int32,[None])
            #margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM))
            #self.loss = tf.reduce_mean(margin)

            #maximum likelihood method
            self.loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    unscaled_log_distribution, self.sense))

            self.optimizer = tf.train.AdagradOptimizer(self.lr)
            self.train_op = self.optimizer.minimize(self.loss)
Beispiel #11
0
    def __init__(self, session, num_labels, num_layers, hidden_size, dropout,
                 batch_size, learning_rate, lr_decay_factor, grad_clip,
                 max_input_seq_length, max_target_seq_length, input_dim,
                 forward_only=False, tensorboard_dir=None, tb_run_name=None):
        """
        Acoustic rnn model, using ctc loss with lstm cells
        Inputs:
        session - tensorflow session
        num_labels - dimension of character input/one hot encoding
        num_layers - number of lstm layers
        hidden_size - size of hidden layers
        dropout - probability of dropping hidden weights
        batch_size - number of training examples fed at once
        learning_rate - learning rate parameter fed to optimizer
        lr_decay_factor - decay factor of the learning rate
        grad_clip - max gradient size (prevent exploding gradients)
        max_input_seq_length - maximum length of input vector sequence
        max_target_seq_length - maximum length of ouput vector sequence
        input_dim - dimension of input vector
        forward_only - whether to build back prop nodes or not
        tensorboard_dir - path to tensorboard file (None if not activated)
        """
        # Define GraphKeys for TensorBoard
        graphkey_training = tf.GraphKeys()
        graphkey_test = tf.GraphKeys()

        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
        tf.scalar_summary('Learning rate', self.learning_rate, collections=[graphkey_training, graphkey_test])
        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * lr_decay_factor)
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.dropout_keep_prob_lstm_input = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_output = tf.constant(self.dropout)
        self.max_input_seq_length = max_input_seq_length
        self.max_target_seq_length = max_target_seq_length
        self.tensorboard_dir = tensorboard_dir

        # Initialize data pipes and audio_processor to None
        self.train_conn = None
        self.test_conn = None
        self.audio_processor = None

        # graph inputs
        self.inputs = tf.placeholder(tf.float32,
                                     shape=[self.max_input_seq_length, None, input_dim],
                                     name="inputs")
        # We could take an int16 for less memory consumption but CTC need an int32
        self.input_seq_lengths = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name="input_seq_lengths")
        # Take an int16 for less memory consumption
        # max_target_seq_length should be less than 65535 (which is huge)
        self.target_seq_lengths = tf.placeholder(tf.int16,
                                                 shape=[None],
                                                 name="target_seq_lengths")

        # Define cells of acoustic model
        cell = rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True)
        if not forward_only:
            # If we are in training then add a dropoutWrapper to the cells
            cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=self.dropout_keep_prob_lstm_input,
                                           output_keep_prob=self.dropout_keep_prob_lstm_output)

        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

        # build input layer
        with tf.name_scope('Input_Layer'):
            w_i = tf.Variable(tf.truncated_normal([input_dim, hidden_size], stddev=np.sqrt(2.0 / (2 * hidden_size))),
                              name="input_w")
            b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b")

        # make rnn inputs
        inputs = [tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i
                  for i in tf.split(0, self.max_input_seq_length, self.inputs)]

        # set rnn init state to 0s
        init_state = cell.zero_state(self.batch_size, tf.float32)

        # build rnn
        with tf.name_scope('Dynamic_rnn'):
            rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs),
                                                            sequence_length=self.input_seq_lengths,
                                                            initial_state=init_state,
                                                            time_major=True, parallel_iterations=1000)

        # build output layer
        with tf.name_scope('Output_layer'):
            w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels], stddev=np.sqrt(2.0 / (2 * num_labels))),
                              name="output_w")
            b_o = tf.Variable(tf.zeros([num_labels]), name="output_b")

        # compute logits
        self.logits = tf.pack([tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o
                               for i in tf.split(0, self.max_input_seq_length, rnn_output)])

        # compute prediction
        self.prediction = tf.to_int32(ctc.ctc_beam_search_decoder(self.logits, self.input_seq_lengths)[0][0])

        if not forward_only:
            # graph sparse tensor inputs
            # We could take an int16 for less memory consumption but SparseTensor need an int64
            self.target_indices = tf.placeholder(tf.int64,
                                                 shape=[None, 2],
                                                 name="target_indices")
            # We could take an int8 for less memory consumption but CTC need an int32
            self.target_vals = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="target_vals")

            # setup sparse tensor for input into ctc loss
            sparse_labels = tf.SparseTensor(
                indices=self.target_indices,
                values=self.target_vals,
                shape=[self.batch_size, self.max_target_seq_length])

            # compute ctc loss
            self.ctc_loss = ctc.ctc_loss(self.logits, sparse_labels,
                                         self.input_seq_lengths)
            self.mean_loss = tf.reduce_mean(self.ctc_loss)
            tf.scalar_summary('Mean loss (Training)', self.mean_loss, collections=[graphkey_training])
            tf.scalar_summary('Mean loss (Test)', self.mean_loss, collections=[graphkey_test])
            params = tf.trainable_variables()

            opt = tf.train.AdamOptimizer(self.learning_rate)
            gradients = tf.gradients(self.ctc_loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                             grad_clip)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

            # Accuracy
            with tf.name_scope('Accuracy'):
                errorRate = tf.reduce_sum(tf.edit_distance(self.prediction, sparse_labels, normalize=False)) / \
                           tf.to_float(tf.size(sparse_labels.values))
                tf.scalar_summary('Error Rate (Training)', errorRate, collections=[graphkey_training])
                tf.scalar_summary('Error Rate (Test)', errorRate, collections=[graphkey_test])

        # TensorBoard init
        if self.tensorboard_dir is not None:
            self.train_summaries = tf.merge_all_summaries(key=graphkey_training)
            self.test_summaries = tf.merge_all_summaries(key=graphkey_test)
            if tb_run_name is None:
                run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
            else:
                run_name = tb_run_name
            self.summary_writer = tf.train.SummaryWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph)
        else:
            self.summary_writer = None

        # We need to save all variables except for the hidden_state
        # we keep it across batches but we don't need it across different runs
        # Especially when we process a one time file
        save_list = [var for var in tf.all_variables() if var.name.find('hidden_state') == -1]
        self.saver = tf.train.Saver(save_list)
def define_seq2seq_rnn_for_training(image_input_data,image_input_lengths,label_rnn_input_data,dropout_input_keep_prob,dropout_output_keep_prob):
    # image_rnn_input_data (n_batch_size, n_steps, n_features)
    # label_rnn_input_data (n_batch_size, n_label_rnn_steps, n_classes)

    # Convulation NN
    image_width = image_input_data.get_shape()[1].value
    image_height = image_input_data.get_shape()[2].value

    image_input_data_conv = tf.reshape(image_input_data, [-1, image_width, image_height, 1])

    n_conv1_patch_size = 7
    n_conv1_channels = 32
    print("Convolutional layer 1, Patch size:",n_conv1_patch_size,"Channels:",n_conv1_channels)
    w_conv1 = tf.Variable(tf.random_normal([n_conv1_patch_size, n_conv1_patch_size, 1, n_conv1_channels]),name="w_conv1")
    b_conv1 = tf.Variable(tf.random_normal([n_conv1_channels]),name="b_conv1")

    conv1 = tf.tanh(tf.nn.conv2d(image_input_data_conv, w_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1)

    # n_conv2_patch_size = 5
    # n_conv2_channels = 16
    # print("Convolutional layer 2, Patch size:", n_conv2_patch_size, "Channels:", n_conv2_channels)
    # w_conv2 = tf.Variable(tf.random_normal([n_conv2_patch_size, n_conv2_patch_size, n_conv1_channels, n_conv2_channels]),name="w_conv2")
    # b_conv2 = tf.Variable(tf.random_normal([n_conv2_channels]),name="b_conv2")
    #
    # conv2 = tf.tanh(tf.nn.conv2d(conv1, w_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2)
    #
    # n_conv3_patch_size = 5
    # n_conv3_channels = 16
    # print("Convolutional layer 3, Patch size:", n_conv3_patch_size, "Channels:", n_conv3_channels)
    # w_conv3 = tf.Variable(
    #     tf.random_normal([n_conv3_patch_size, n_conv3_patch_size, n_conv2_channels, n_conv3_channels]), name="w_conv3")
    # b_conv3 = tf.Variable(tf.random_normal([n_conv3_channels]), name="b_conv3")
    #
    # conv3 = tf.tanh(tf.nn.conv2d(conv2, w_conv3, strides=[1, 1, 1, 1], padding='SAME') + b_conv3)

    image_rnn_inputs = tf.reshape(conv1, [-1, image_width, image_height*n_conv1_channels])

    # Define RNN architecture
    n_image_rnn_cells = 1
    n_image_rnn_hidden = 96  # hidden layer num of features
    print("Image LSTM cells:", n_image_rnn_cells, "Image LSTM hidden units:", n_image_rnn_hidden)
    n_label_rnn_cells = 1
    n_label_rnn_hidden = 96  # hidden layer num of features
    print("Label LSTM cells:", n_label_rnn_cells, "Label LSTM hidden units:", n_label_rnn_hidden)

    # Retrieve dimensions from input data
    image_batch_size = tf.shape(image_rnn_inputs)[0]
    n_image_rnn_steps = image_rnn_inputs.get_shape()[1].value  # Timesteps = image width
    n_image_features = image_rnn_inputs.get_shape()[2].value

    label_batch_size = tf.shape(label_rnn_input_data)[0]
    n_label_rnn_steps = label_rnn_input_data.get_shape()[1].value
    n_classes = label_rnn_input_data.get_shape()[2].value

    print(n_image_rnn_steps,n_image_features)
    print(n_label_rnn_steps,n_classes)

    # Define RNN weights
    w_label_hidden = tf.Variable(tf.random_normal([n_classes, n_label_rnn_hidden]),name="w_label_hidden")
    b_label_hidden = tf.Variable(tf.random_normal([n_label_rnn_hidden]),name="b_label_hidden")
    w_label_out = tf.Variable(tf.random_normal([n_label_rnn_hidden, n_classes]),name="w_label_out")
    b_label_out = tf.Variable(tf.random_normal([n_classes]),name="b_label_out")

    # Image RNN
    image_lstm_cell = rnn_cell.LSTMCell(n_image_rnn_hidden)
    image_lstm_cell = rnn_cell.DropoutWrapper(image_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob)
    if n_image_rnn_cells > 1:
        image_lstm_cell = rnn_cell.MultiRNNCell([image_lstm_cell] * n_image_rnn_cells)
    image_rnn_initial_state = image_lstm_cell.zero_state(image_batch_size, tf.float32)
    image_rnn_outputs, image_rnn_states = rnn.dynamic_rnn(image_lstm_cell, image_rnn_inputs, initial_state=image_rnn_initial_state, sequence_length=image_input_lengths, scope="RNN1")
    image_rnn_output = last_relevant(image_rnn_outputs,image_input_lengths)

    # Transform input data for label RNN
    label_rnn_inputs = tf.transpose(label_rnn_input_data, [1, 0, 2])  # (n_output_steps,n_batch_size,n_classes)
    label_rnn_inputs = tf.reshape(label_rnn_inputs, [-1,
                                                     n_classes])  # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each)
    label_rnn_inputs = tf.matmul(label_rnn_inputs,
                                 w_label_hidden) + b_label_hidden  # (n_steps*n_batch_size=28*256,n_hidden=128)
    label_rnn_inputs = tf.split(0, n_label_rnn_steps,
                                label_rnn_inputs)  # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)]

    # Label RNN
    label_lstm_cell = rnn_cell.LSTMCell(n_label_rnn_hidden, forget_bias=0)
    label_lstm_cell = rnn_cell.DropoutWrapper(label_lstm_cell, input_keep_prob=dropout_input_keep_prob,
                                              output_keep_prob=dropout_output_keep_prob)
    if n_label_rnn_cells > 1:
        label_lstm_cell = rnn_cell.MultiRNNCell([label_lstm_cell] * n_label_rnn_cells)

    label_rnn_initial_state = image_rnn_output
    label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32)
    w_image2label = tf.Variable(
        tf.random_normal([image_rnn_output.get_shape()[1].value, label_rnn_initial_state.get_shape()[1].value]))
    b_image2label = tf.Variable(tf.random_normal([label_rnn_initial_state.get_shape()[1].value]))
    label_rnn_initial_state = tf.tanh(tf.matmul(image_rnn_output, w_image2label) + b_image2label)

    label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs,
                                                  initial_state=label_rnn_initial_state, scope="RNN2")

    label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in
                         label_rnn_outputs]  # n_label_rnn_steps * (n_batch_size,n_classes)

    label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs)  # (n_label_rnn_steps,n_batch_size,n_classes)
    label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels,
                                                    [1, 0, 2])  # (n_batch_size,n_label_rnn_steps,n_classes)
    label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels,
                                                 2)  # (n_batch_size, n_label_rnn_steps)

    return label_rnn_outputs,label_rnn_predicted_index_labels
Beispiel #13
0
    def __init__(self,
                 num_labels,
                 num_layers,
                 hidden_size,
                 dropout,
                 batch_size,
                 learning_rate,
                 lr_decay_factor,
                 grad_clip,
                 max_input_seq_length,
                 max_target_seq_length,
                 input_dim,
                 forward_only=False):
        '''
        Acoustic rnn model, using ctc loss with lstm cells
        Inputs:
        num_labels - dimension of character input/one hot encoding
        num_layers - number of lstm layers
        hidden_size - size of hidden layers
        dropout - probability of dropping hidden weights
        batch_size - number of training examples fed at once
        learning_rate - learning rate parameter fed to optimizer
        grad_clip - max gradient size (prevent exploding gradients)
        max_seq_length - maximum length of input vector sequence
        input_dim - dimension of input vector
        forward_only - whether to build back prop nodes or not
        '''
        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * lr_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)
        self.dropout_keep_prob_lstm_input = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_output = tf.constant(self.dropout)
        self.max_input_seq_length = max_input_seq_length
        self.max_target_seq_length = max_target_seq_length
        #graph inputs
        self.inputs = tf.placeholder(
            tf.float32,
            shape=[self.max_input_seq_length, None, input_dim],
            name="inputs")
        self.input_seq_lengths = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name="input_seq_lengths")
        self.target_seq_lengths = tf.placeholder(tf.int32,
                                                 shape=[None],
                                                 name="target_seq_lengths")
        #graph sparse tensor inputs
        self.target_indices = tf.placeholder(tf.int64,
                                             shape=[None, 2],
                                             name="target_indices")
        self.target_vals = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name="target_vals")

        #define cells of acoustic model
        cell = rnn_cell.DropoutWrapper(
            rnn_cell.BasicLSTMCell(hidden_size),
            input_keep_prob=self.dropout_keep_prob_lstm_input,
            output_keep_prob=self.dropout_keep_prob_lstm_output)

        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        #build input layer
        w_i = tf.get_variable("input_w", [input_dim, hidden_size])
        b_i = tf.get_variable("input_b", [hidden_size])

        #make rnn inputs
        inputs = [
            tf.nn.xw_plus_b(tf.squeeze(i), w_i, b_i)
            for i in tf.split(0, self.max_input_seq_length, self.inputs)
        ]

        #set rnn init state to 0s
        initial_state = cell.zero_state(self.batch_size, tf.float32)

        #build rnn
        rnn_output, self.hidden_state = rnn.dynamic_rnn(
            cell,
            tf.pack(inputs),
            sequence_length=self.input_seq_lengths,
            initial_state=initial_state,
            time_major=True,
            parallel_iterations=100)

        #build output layer
        w_o = tf.get_variable("output_w", [hidden_size, num_labels])
        b_o = tf.get_variable("output_b", [num_labels])

        #compute logits
        self.logits = [
            tf.nn.xw_plus_b(tf.squeeze(i), w_o, b_o)
            for i in tf.split(0, self.max_input_seq_length, rnn_output)
        ]
        #setup sparse tensor for input into ctc loss
        sparse_labels = tf.SparseTensor(
            indices=self.target_indices,
            values=self.target_vals,
            shape=[self.batch_size, self.max_target_seq_length])

        #compute ctc loss
        self.ctc_loss = ctc.ctc_loss(tf.pack(self.logits), sparse_labels,
                                     self.input_seq_lengths)
        self.mean_loss = tf.reduce_mean(self.ctc_loss)
        params = tf.trainable_variables()

        if not forward_only:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            gradients = tf.gradients(self.ctc_loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(
                gradients, grad_clip)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
def define_seq2seq_rnn_for_training(image_rnn_input_data,image_rnn_input_lengths,label_rnn_input_data,dropout_input_keep_prob,dropout_output_keep_prob):
    # image_rnn_input_data (n_batch_size, n_steps, n_features)
    # label_rnn_input_data (n_batch_size, n_label_rnn_steps, n_classes)

    # Define RNN architecture
    n_image_rnn_cells = 1
    n_image_rnn_hidden = 96  # hidden layer num of features
    print("Image LSTM cells:", n_image_rnn_cells, "Image LSTM hidden units:", n_image_rnn_hidden)
    n_label_rnn_cells = 1
    n_label_rnn_hidden = 96  # hidden layer num of features
    print("Label LSTM cells:", n_label_rnn_cells, "Label LSTM hidden units:", n_label_rnn_hidden)

    # Retrieve dimensions from input data
    image_batch_size = tf.shape(image_rnn_input_data)[0]
    n_image_rnn_steps = image_rnn_input_data.get_shape()[1].value  # Timesteps = image width
    n_image_features = image_rnn_input_data.get_shape()[2].value

    label_batch_size = tf.shape(label_rnn_input_data)[0]
    n_label_rnn_steps = label_rnn_input_data.get_shape()[1].value
    n_classes = label_rnn_input_data.get_shape()[2].value

    print(n_image_rnn_steps,n_image_features)
    print(n_label_rnn_steps,n_classes)

    # Define weights
    w_image_hidden = tf.Variable(tf.random_normal([n_image_features, n_image_rnn_hidden]))
    b_image_hidden = tf.Variable(tf.random_normal([n_image_rnn_hidden]))
    w_label_hidden = tf.Variable(tf.random_normal([n_classes, n_label_rnn_hidden]))
    b_label_hidden = tf.Variable(tf.random_normal([n_label_rnn_hidden]))
    w_label_out = tf.Variable(tf.random_normal([n_label_rnn_hidden, n_classes]))
    b_label_out = tf.Variable(tf.random_normal([n_classes]))

    # Transform input data for image RNN
    # image_rnn_inputs = tf.transpose(image_rnn_input_data, [1, 0, 2])  # (n_input_steps,n_batch_size,n_features)
    # image_rnn_inputs = tf.reshape(image_rnn_inputs, [-1,
    #                                                  n_image_features])  # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each)
    # image_rnn_inputs = tf.matmul(image_rnn_inputs,
    #                              w_image_hidden) + b_image_hidden  # (n_steps*n_batch_size=28*256,n_hidden=128)
    # image_rnn_inputs = tf.split(0, n_image_rnn_steps,
    #                             image_rnn_inputs)  # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)]
    image_rnn_inputs = image_rnn_input_data

    # Transform target data for label RNN
    # label_rnn_target_outputs = tf.transpose(label_rnn_target_data, [1, 0])  # (n_label_rnn_steps,n_batch_size)
    # label_rnn_target_outputs = tf.split(0, n_label_rnn_steps, label_rnn_target_outputs)
    # label_rnn_target_outputs = [tf.squeeze(lrt) for lrt in label_rnn_target_outputs]

    # Image RNN
    image_lstm_cell = rnn_cell.LSTMCell(n_image_rnn_hidden)
    image_lstm_cell = rnn_cell.DropoutWrapper(image_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob)
    if n_image_rnn_cells > 1:
        image_lstm_cell = rnn_cell.MultiRNNCell([image_lstm_cell] * n_image_rnn_cells)
    image_rnn_initial_state = image_lstm_cell.zero_state(image_batch_size, tf.float32)
    image_rnn_outputs, image_rnn_states = rnn.dynamic_rnn(image_lstm_cell, image_rnn_inputs, initial_state=image_rnn_initial_state, sequence_length=image_rnn_input_lengths, scope="RNN1")
    # image_lstm_fw_cell = rnn_cell.LSTMCell(n_image_rnn_hidden, forget_bias=0)
    # image_lstm_fw_cell = rnn_cell.DropoutWrapper(image_lstm_fw_cell, input_keep_prob=dropout_input_keep_prob,
    #                                              output_keep_prob=dropout_output_keep_prob)
    # if n_image_rnn_cells > 1:
    #     image_lstm_fw_cell = rnn_cell.MultiRNNCell([image_lstm_fw_cell] * n_image_rnn_cells)
    # image_rnn_initial_state_fw = image_lstm_fw_cell.zero_state(image_batch_size, tf.float32)
    #
    # image_lstm_bw_cell = rnn_cell.LSTMCell(n_image_rnn_hidden, forget_bias=0)
    # image_lstm_bw_cell = rnn_cell.DropoutWrapper(image_lstm_bw_cell, input_keep_prob=dropout_input_keep_prob,
    #                                              output_keep_prob=dropout_output_keep_prob)
    # if n_image_rnn_cells > 1:
    #     image_lstm_bw_cell = rnn_cell.MultiRNNCell([image_lstm_bw_cell] * n_image_rnn_cells)
    # image_rnn_initial_state_bw = image_lstm_bw_cell.zero_state(image_batch_size, tf.float32)
    #
    # image_rnn_outputs, image_rnn_state_fw, image_rnn_state_bw = rnn.bidirectional_rnn(image_lstm_fw_cell,
    #                                                                                   image_lstm_bw_cell,
    #                                                                                   image_rnn_inputs,
    #                                                                                   initial_state_fw=image_rnn_initial_state_fw,
    #                                                                                   initial_state_bw=image_rnn_initial_state_bw)

    #image_rnn_output = image_rnn_outputs[-1]
    image_rnn_output = last_relevant(image_rnn_outputs,image_rnn_input_lengths)

    # Transform input data for label RNN
    label_rnn_inputs = tf.transpose(label_rnn_input_data, [1, 0, 2])  # (n_output_steps,n_batch_size,n_classes)
    label_rnn_inputs = tf.reshape(label_rnn_inputs, [-1,
                                                     n_classes])  # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each)
    label_rnn_inputs = tf.matmul(label_rnn_inputs,
                                 w_label_hidden) + b_label_hidden  # (n_steps*n_batch_size=28*256,n_hidden=128)
    label_rnn_inputs = tf.split(0, n_label_rnn_steps,
                                label_rnn_inputs)  # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)]

    # Label RNN
    label_lstm_cell = rnn_cell.LSTMCell(n_label_rnn_hidden, forget_bias=0)
    label_lstm_cell = rnn_cell.DropoutWrapper(label_lstm_cell, input_keep_prob=dropout_input_keep_prob,
                                              output_keep_prob=dropout_output_keep_prob)
    if n_label_rnn_cells > 1:
        label_lstm_cell = rnn_cell.MultiRNNCell([label_lstm_cell] * n_label_rnn_cells)

    label_rnn_initial_state = image_rnn_output
    label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32)
    w_image2label = tf.Variable(
        tf.random_normal([image_rnn_output.get_shape()[1].value, label_rnn_initial_state.get_shape()[1].value]))
    b_image2label = tf.Variable(tf.random_normal([label_rnn_initial_state.get_shape()[1].value]))
    label_rnn_initial_state = tf.tanh(tf.matmul(image_rnn_output, w_image2label) + b_image2label)

    label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs,
                                                  initial_state=label_rnn_initial_state, scope="RNN2")

    label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in
                         label_rnn_outputs]  # n_label_rnn_steps * (n_batch_size,n_classes)

    label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs)  # (n_label_rnn_steps,n_batch_size,n_classes)
    label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels,
                                                    [1, 0, 2])  # (n_batch_size,n_label_rnn_steps,n_classes)
    label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels,
                                                 2)  # (n_batch_size, n_label_rnn_steps)

    return label_rnn_outputs,label_rnn_predicted_index_labels
Beispiel #15
0
	def build_model(self):
		with tf.variable_scope('RNNTEST'):
			self.sense = tf.placeholder(tf.int32,[None])
			self.arg1 = tf.placeholder(tf.int32,[None,None,4])
			self.arg2 = tf.placeholder(tf.int32,[None,None,4])
			self.arg1_len = tf.placeholder(tf.int32,[None])
			self.arg2_len = tf.placeholder(tf.int32,[None])
			self.keep_prob = tf.placeholder(tf.float32)

			arg1_list = tf.split(2,4,self.arg1)
			arg2_list = tf.split(2,4,self.arg2)
			
			with tf.device('/cpu:0'):
				NER_W = tf.get_variable('NER_embed',[self.data_loader.NER_vocab_size,self.NER_embed_size]) if self.NER_embed_size>0 else None
				lemma_W = tf.get_variable('lemma_embed',[self.data_loader.lemma_vocab_size,self.lemma_embed_size]) if self.lemma_embed_size>0 else None
				if self.use_pre_trained_embedding:
					word_W = tf.get_variable('word_embed',initializer = tf.convert_to_tensor(self.data_loader.pre_trained_word_embeddings,dtype=tf.float32)) if self.word_embed_size>0 else None
				else:
					word_W = tf.get_variable('word_embed',shape = [self.data_loader.word_vocab_size,self.word_embed_size]) if self.word_embed_size>0 else None
				POS_W = tf.get_variable('POS_embed',[self.data_loader.POS_vocab_size,self.POS_embed_size]) if self.POS_embed_size>0 else None
			arg1_embed_list = []
			arg2_embed_list = []
			for idx,W in enumerate([NER_W,lemma_W,word_W,POS_W]):
				if W is not None:
					arg1_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg1_list[idx],[2])))
					arg2_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg2_list[idx],[2])))
			arg1 = tf.nn.dropout(tf.concat(2,arg1_embed_list),self.keep_prob)
			arg2 = tf.nn.dropout(tf.concat(2,arg2_embed_list),self.keep_prob)
			
			encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size)
			decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size)

			with tf.variable_scope('forward_encoder'):
				forward_encoder_outputs,forward_encoder_state = rnn.dynamic_rnn(encoder_lstm_unit,arg1,self.arg1_len,dtype=tf.float32)
			with tf.variable_scope('backward_encoder'):
				backward_encoder_outputs,backward_encoder_state= rnn.dynamic_rnn(encoder_lstm_unit,tf.reverse_sequence(arg1,tf.cast(self.arg1_len,tf.int64),1),dtype=tf.float32)
			encoder_outputs = tf.concat(2,[forward_encoder_outputs,tf.reverse_sequence(backward_encoder_outputs,tf.cast(self.arg1_len,tf.int64),1)])
			encoder_state = tf.concat(1,[forward_encoder_state,backward_encoder_state])

			source = tf.expand_dims(encoder_outputs,2) #batch_size x source_len x 1 x source_depth(2*encoder_size)
			attention_W = tf.get_variable('attention_W',[1,1,2*self.encoder_size,self.attention_judge_size])
			attention_V = tf.get_variable('attention_V',[self.attention_judge_size])
 			WxH = tf.nn.conv2d(source, attention_W,[1,1,1,1],'SAME') #batch_size x source_len x 1 x attention
 			self.mask = tf.placeholder(tf.float32,[None,None])

			def attention(input_t,output_t_minus_1,time):
				with tf.variable_scope('attention'):
					VxS = tf.reshape(rnn_cell.linear(output_t_minus_1,self.attention_judge_size,True),[-1,1,1,self.attention_judge_size]) #batch_size x 1 x 1 x attention
				_exp = tf.exp(tf.reduce_sum( attention_V * tf.tanh(WxH+VxS), [3]))#batch_size x source_len x 1
				_exp = _exp*tf.expand_dims(self.mask,-1)
				attention_weight = _exp/tf.reduce_sum(_exp,[1], keep_dims=True)
				attention_t = tf.reduce_sum(encoder_outputs*attention_weight,[1])
				feed_in_t = tf.tanh(rnn_cell.linear([attention_t,input_t],self.embedding_size,True))
				return feed_in_t

			with tf.variable_scope('decoder'):
				decoder_outputs,decoder_state = dynamic_rnn_decoder(arg2,decoder_lstm_unit,initial_state=encoder_state,sequence_length=self.arg2_len,loop_function=attention)
			judge = tf.concat(1,[tf.reduce_sum(decoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg2_len,tf.float32),-1),tf.reduce_sum(encoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg1_len,tf.float32),-1)])
			unscaled_log_distribution = rnn_cell.linear(judge,self.data_loader.sense_vocab_size,True)
			self.output = tf.cast(tf.argmax(unscaled_log_distribution,1),tf.int32)
			self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output,self.sense), tf.float32))
			
			#max-margin method
			#self._MM = tf.placeholder(tf.int32,[None])
			#margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM))
			#self.loss = tf.reduce_mean(margin)

			#maximum likelihood method
			self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(unscaled_log_distribution, self.sense))
			
			self.optimizer = tf.train.AdagradOptimizer(self.lr)
			self.train_op = self.optimizer.minimize(self.loss)