Python AttentionCellWrapperの例、tensorflow.contrib.rnn.AttentionCellWrapper Pythonの例

コード例 #1

0

ファイルを表示

ファイル: TFModelTrainer.py プロジェクト: githubcstahlhut/EDoHa

    def BiRNN(x, weights):

        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (batch_size, timesteps, n_input)
        # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

        # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
        x = tf.unstack(x, max_length, 1)

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn.AttentionCellWrapper(
            rnn.BasicLSTMCell(dims, forget_bias=1.0), max_length)
        # Backward direction cell
        lstm_bw_cell = rnn.AttentionCellWrapper(
            rnn.BasicLSTMCell(dims, forget_bias=1.0), max_length)

        # Get lstm cell output
        outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell,
                                                     lstm_bw_cell,
                                                     x,
                                                     dtype=tf.float32)

        print("BiLSTM lengths: ", len(outputs))
        # Linear activation, using rnn inner loop last output
        return outputs[-1]

コード例 #2

0

ファイルを表示

 def _build(self):
     """Get feed forward step, loss function, and optimizer for RNN"""
     # Define input layers
     self.sentences = tf.placeholder(tf.int32, [None, None])
     self.sentence_lengths = tf.placeholder(tf.int32, [None])
     self.train_marginals = tf.placeholder(tf.float32, [None])
     self.keep_prob = tf.placeholder(tf.float32)
     # Seeds
     s = self.seed
     s1, s2, s3, s4 = [None] * 4 if s is None else [s + i for i in range(4)]
     # Embedding layer
     emb_var = tf.Variable(self._embedding_init(s1))
     embedding = tf.concat([tf.zeros([1, self.dim]), emb_var], axis=0)
     inputs = tf.nn.embedding_lookup(embedding, self.sentences)
     # Build RNN graph
     batch_size = tf.shape(self.sentences)[0]
     rand_name = "RNN_{0}".format(random.randint(0, 1e12))  # Obscene hack
     init = tf.contrib.layers.xavier_initializer(seed=s2)
     with tf.variable_scope(rand_name, reuse=False, initializer=init):
         # Build RNN cells
         fw_cell = self.cell(self.dim)
         bw_cell = self.cell(self.dim)
         # Add attention if needed
         if self.attn:
             fw_cell = rnn.AttentionCellWrapper(fw_cell,
                                                self.attn,
                                                state_is_tuple=True)
             bw_cell = rnn.AttentionCellWrapper(bw_cell,
                                                self.attn,
                                                state_is_tuple=True)
         # Construct RNN
         initial_state_fw = fw_cell.zero_state(batch_size, tf.float32)
         initial_state_bw = bw_cell.zero_state(batch_size, tf.float32)
         rnn_out, _ = tf.nn.bidirectional_dynamic_rnn(
             fw_cell,
             bw_cell,
             inputs,
             sequence_length=self.sentence_lengths,
             initial_state_fw=initial_state_fw,
             initial_state_bw=initial_state_bw,
             time_major=False)
     # Get potentials
     potentials = get_bi_rnn_output(rnn_out, self.dim,
                                    self.sentence_lengths)
     # Compute activation
     potentials_dropout = tf.nn.dropout(potentials, self.keep_prob, seed=s3)
     W = tf.Variable(tf.random_normal((2 * self.dim, 1), stddev=SD,
                                      seed=s4))
     b = tf.Variable(0., dtype=tf.float32)
     h_dropout = tf.squeeze(tf.matmul(potentials_dropout, W)) + b
     # Noise-aware loss
     self.loss = tf.reduce_mean(
         tf.nn.sigmoid_cross_entropy_with_logits(
             labels=self.train_marginals, logits=h_dropout))
     # Backprop trainer
     self.train_fn = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
     # Get prediction
     self.prediction = tf.nn.sigmoid(h_dropout)

コード例 #3

0

ファイルを表示

 def rnn_estimator(x, y):
     """RNN estimator with target predictor function on top."""
     x = input_op_fn(x)
     if cell_type == 'rnn':
         cell_fn = nn.rnn_cell.BasicRNNCell
     elif cell_type == 'gru':
         cell_fn = nn.rnn_cell.GRUCell
     elif cell_type == 'lstm':
         cell_fn = nn.rnn_cell.BasicLSTMCell
     else:
         raise ValueError(
             'cell_type {} is not supported. '.format(cell_type))
     # TODO: state_is_tuple=False is deprecated
     if bidirectional:
         # forward direction cell
         fw_cell = cell_fn(rnn_size)
         bw_cell = cell_fn(rnn_size)
         # attach attention cells if specified
         if attn_length is not None:
             fw_cell = contrib_rnn.AttentionCellWrapper(
                 fw_cell,
                 attn_length=attn_length,
                 attn_size=attn_size,
                 attn_vec_size=attn_vec_size,
                 state_is_tuple=False)
             bw_cell = contrib_rnn.AttentionCellWrapper(
                 fw_cell,
                 attn_length=attn_length,
                 attn_size=attn_size,
                 attn_vec_size=attn_vec_size,
                 state_is_tuple=False)
         rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers)
         # backward direction cell
         rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers)
         # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
         _, encoding = bidirectional_rnn(rnn_fw_cell,
                                         rnn_bw_cell,
                                         x,
                                         dtype=dtypes.float32,
                                         sequence_length=sequence_length,
                                         initial_state_fw=initial_state,
                                         initial_state_bw=initial_state)
     else:
         rnn_cell = cell_fn(rnn_size)
         if attn_length is not None:
             rnn_cell = contrib_rnn.AttentionCellWrapper(
                 rnn_cell,
                 attn_length=attn_length,
                 attn_size=attn_size,
                 attn_vec_size=attn_vec_size,
                 state_is_tuple=False)
         cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers)
         _, encoding = nn.rnn(cell,
                              x,
                              dtype=dtypes.float32,
                              sequence_length=sequence_length,
                              initial_state=initial_state)
     return target_predictor_fn(encoding, y)

コード例 #4

0

ファイルを表示

 def attn_rnn_cell():
   return contrib_rnn.AttentionCellWrapper(
       rnn_cell(),
       attn_length=attn_length,
       attn_size=attn_size,
       attn_vec_size=attn_vec_size,
       state_is_tuple=False)

コード例 #5

0

ファイルを表示

def make_rnn_cell(rnn_layer_sizes,
                  dropout_keep_prob=1.0,
                  attn_length=0,
                  base_cell=contrib_rnn.BasicLSTMCell,
                  residual_connections=False):
    cells = []
    for i in range(len(rnn_layer_sizes)):
        cell = base_cell(rnn_layer_sizes[i])
        if attn_length and not cells:
            # Add attention wrapper to first layer.
            cell = contrib_rnn.AttentionCellWrapper(cell,
                                                    attn_length,
                                                    state_is_tuple=True)
        if residual_connections:
            cell = contrib_rnn.ResidualWrapper(cell)
            if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]:
                cell = contrib_rnn.InputProjectionWrapper(
                    cell, rnn_layer_sizes[i])
        cell = contrib_rnn.DropoutWrapper(cell,
                                          output_keep_prob=dropout_keep_prob)
        cells.append(cell)

    cell = contrib_rnn.MultiRNNCell(cells)

    return cell

コード例 #6

0

ファイルを表示

 def lstm_cell(lstm_unit=256):
     cell = tf.nn.rnn_cell.LSTMCell(num_units=lstm_unit)
     cell = rnn.AttentionCellWrapper(
         cell=cell,
         attn_length=self._attention_length,
         state_is_tuple=True)
     cell = tf.nn.rnn_cell.DropoutWrapper(
         cell=cell, input_keep_prob=self._keep_prob)
     return cell

コード例 #7

0

ファイルを表示

ファイル: my_layers.py プロジェクト: sd12037/tensorflow

def bidirectional_LSTM(x,
                       n_hidden,
                       return_seq=False,
                       attention=0,
                       cell='LSTM'):
    # maybe x.shape = (batch_size, seq_len, dim)
    # change x to list of (batch_size, dim)
    #x = tf.unstack(x, None, 1)

    if cell == 'GRU':
        cell_forward = rnn.GRUCell(n_hidden)
        cell_backward = rnn.GRUCell(n_hidden)

    if cell == 'LSTM':
        cell_forward = rnn.LSTMCell(n_hidden)
        cell_backward = rnn.LSTMCell(n_hidden)

    if cell == 'TF-LSTM':
        cell_forward = rnn.TimeFreqLSTMCell(num_units=n_hidden,
                                            feature_size=3,
                                            frequency_skip=1)
        cell_backward = rnn.TimeFreqLSTMCell(num_units=n_hidden,
                                             feature_size=3,
                                             frequency_skip=1)

    if cell == 'Grid-LSTM':
        cell_forward = rnn.GridLSTMCell(n_hidden, num_frequency_blocks=[5])
        cell_backward = rnn.GridLSTMCell(n_hidden, num_frequency_blocks=[5])

    if attention == 0:
        pass
    else:
        cell_forward = rnn.AttentionCellWrapper(cell_forward,
                                                attn_length=attention)
        cell_backward = rnn.AttentionCellWrapper(cell_backward,
                                                 attn_length=attention)

    h, _, _ = \
        rnn.static_bidirectional_rnn(cell_forward, cell_backward, x,
                                     dtype=tf.float32)
    if return_seq == True:
        return h
    else:
        return h[-1]

コード例 #8

0

ファイルを表示

def inference(x,
              n_in=None,
              n_time=None,
              n_hidden=None,
              n_out=None,
              keep_prob=None):
    def weight_variable(shape, name='W'):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial, name)

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial)

    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_in])
    x = tf.split(x, n_time, 0)

    with tf.name_scope('RNN'):
        cell_forward = rnn.GRUCell(n_hidden)
        cell_forward = rnn.AttentionCellWrapper(cell_forward, attn_length=14)
        cell_backward = rnn.GRUCell(n_hidden)
        cell_backward = rnn.AttentionCellWrapper(cell_backward, attn_length=14)

        h, _, _ = \
            rnn.static_bidirectional_rnn(cell_forward, cell_backward, x,
                                         dtype=tf.float32)
        h = h[-1]
        h = tf.nn.dropout(h, keep_prob)
    with tf.name_scope('fc_NN'):
        W = weight_variable([n_hidden * 2, n_hidden], name='W')
        b = bias_variable([n_hidden])
        h = tf.nn.elu(tf.layers.batch_normalization(tf.matmul(h, W) + b))

        h = tf.nn.dropout(h, keep_prob)

        Wo = weight_variable([n_hidden, n_out], name='Wo')
        bo = bias_variable([n_out])
        y = tf.nn.softmax(tf.layers.batch_normalization(tf.matmul(h, Wo) + bo))

    W_list = [W, Wo]
    return y, W_list

コード例 #9

0

ファイルを表示

ファイル: medical_rnn.py プロジェクト: bristy1588/6867Project

def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    #lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    lstm_cell = rnn.AttentionCellWrapper(
                cell=rnn.BasicLSTMCell(num_hidden, forget_bias=1.0),
                attn_length= 10)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

コード例 #10

0

ファイルを表示

def make_rnn_cell(rnn_layer_sizes,
                  dropout_keep_prob=1.0,
                  attn_length=0,
                  base_cell=contrib_rnn.BasicLSTMCell,
                  residual_connections=False):
    """Makes a RNN cell from the given hyperparameters.

  Args:
    rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
        RNN.
    dropout_keep_prob: The float probability to keep the output of any given
        sub-cell.
    attn_length: The size of the attention vector.
    base_cell: The base tf.contrib.rnn.RNNCell to use for sub-cells.
    residual_connections: Whether or not to use residual connections (via
        tf.contrib.rnn.ResidualWrapper).

  Returns:
      A tf.contrib.rnn.MultiRNNCell based on the given hyperparameters.
  """
    cells = []
    for i in range(len(rnn_layer_sizes)):
        cell = base_cell(rnn_layer_sizes[i])
        if attn_length and not cells:
            # Add attention wrapper to first layer.
            cell = contrib_rnn.AttentionCellWrapper(cell,
                                                    attn_length,
                                                    state_is_tuple=True)
        if residual_connections:
            cell = contrib_rnn.ResidualWrapper(cell)
            if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]:
                cell = contrib_rnn.InputProjectionWrapper(
                    cell, rnn_layer_sizes[i])
        cell = contrib_rnn.DropoutWrapper(cell,
                                          output_keep_prob=dropout_keep_prob)
        cells.append(cell)

    cell = contrib_rnn.MultiRNNCell(cells)

    return cell

コード例 #11

0

ファイルを表示

def infer(x,
          y,
          batch_size,
          is_training,
          num_input_digits=None,
          num_output_digits=None,
          num_hidden=None,
          num_out=None):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial)

    # Encoder.
    encoder = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    encoder = rnn.AttentionCellWrapper(encoder,
                                       num_input_digits,
                                       state_is_tuple=True)
    state = encoder.zero_state(batch_size, tf.float32)
    encoder_outputs = []
    encoder_states = []

    with tf.variable_scope('Encoder'):
        for t in range(num_input_digits):
            if t > 0:
                tf.get_variable_scope().reuse_variables()
            # x = (samples, time-steps, features).
            (output, state) = encoder(x[:, t, :], state)
            encoder_outputs.append(output)
            encoder_states.append(state)

    # Decoder.
    decoder = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    decoder = rnn.AttentionCellWrapper(decoder,
                                       num_input_digits,
                                       state_is_tuple=True)
    state = encoder_states[-1]
    decoder_outputs = [encoder_outputs[-1]]

    # Pre-define weight and bias of output layer.
    V = weight_variable([num_hidden, num_out])
    c = bias_variable([num_out])
    outputs = []

    with tf.variable_scope('Decoder'):
        for t in range(1, num_output_digits):
            if t > 1:
                tf.get_variable_scope().reuse_variables()

            if is_training is True:
                # y = (samples, time-steps, features).
                (output, state) = decoder(y[:, t - 1, :], state)
            else:
                # Use the previous output as an input.
                linear = tf.matmul(decoder_outputs[-1], V) + c
                out = tf.nn.softmax(linear)
                outputs.append(out)
                out = tf.one_hot(tf.argmax(out, -1), depth=num_output_digits)
                (output, state) = decoder(out, state)

            decoder_outputs.append(output)

    if is_training is True:
        output = tf.reshape(tf.concat(decoder_outputs, axis=1),
                            [-1, num_output_digits, num_hidden])

        linear = tf.einsum('ijk,kl->ijl', output, V) + c
        #linear = tf.matmul(output, V) + c
        return tf.nn.softmax(linear)
    else:
        # Compute the final output.
        linear = tf.matmul(decoder_outputs[-1], V) + c
        out = tf.nn.softmax(linear)
        outputs.append(out)

        output = tf.reshape(tf.concat(outputs, axis=1),
                            [-1, num_output_digits, num_out])
        return output

コード例 #12

0

ファイルを表示

graph = tf.Graph()
with graph.as_default():

    #------------------------------------construct LSTM------------------------------------------#
    #place hoder
    X_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, TIME_STEPS, 28),
                         name="input_placeholder")
    y_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, 10),
                         name="pred_placeholder")

    #lstm instance
    lstm_forward_1 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1)
    #加attention(这里的attention和encoder-decoder架构的attention稍有不同)
    lstm_forward_1 = rnn.AttentionCellWrapper(cell=lstm_forward_1,
                                              attn_length=5)

    lstm_forward_2 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
    # 加attention
    lstm_forward_2 = rnn.AttentionCellWrapper(cell=lstm_forward_2,
                                              attn_length=5)
    lstm_forward = rnn.MultiRNNCell(cells=[lstm_forward_1, lstm_forward_2])

    lstm_backward_1 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS1)
    #加attention
    lstm_backward_1 = rnn.AttentionCellWrapper(cell=lstm_backward_1,
                                               attn_length=5)

    lstm_backward_2 = rnn.BasicLSTMCell(num_units=HIDDEN_UNITS)
    lstm_backward_2 = rnn.AttentionCellWrapper(cell=lstm_backward_2,
                                               attn_length=5)

コード例 #13

0

ファイルを表示

def inference(x,
              y,
              n_batch,
              is_training,
              input_digits=None,
              output_digits=None,
              n_hidden=None,
              n_out=None):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial)

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial)

    # Encode
    encoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    encoder = rnn.AttentionCellWrapper(encoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder.zero_state(n_batch, tf.float32)
    encoder_outputs = []
    encoder_states = []

    with tf.variable_scope('Encoder'):
        for t in range(input_digits):
            if t > 0:
                tf.get_variable_scope().reuse_variables()
            (output, state) = encoder(x[:, t, :], state)
            encoder_outputs.append(output)
            encoder_states.append(state)

    # Decode
    decoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    decoder = rnn.AttentionCellWrapper(decoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder_states[-1]
    decoder_outputs = [encoder_outputs[-1]]

    # 출력층의 웨이트와 바이어스를 미리 정의해둔다
    V = weight_variable([n_hidden, n_out])
    c = bias_variable([n_out])
    outputs = []

    with tf.variable_scope('Decoder'):
        for t in range(1, output_digits):
            if t > 1:
                tf.get_variable_scope().reuse_variables()

            if is_training is True:
                (output, state) = decoder(y[:, t - 1, :], state)
            else:
                # 직전의 출력을 구한다
                linear = tf.matmul(decoder_outputs[-1], V) + c
                out = tf.nn.softmax(linear)
                outputs.append(out)
                out = tf.one_hot(tf.argmax(out, -1), depth=output_digits)

                (output, state) = decoder(out, state)

            decoder_outputs.append(output)

    if is_training is True:
        output = tf.reshape(tf.concat(decoder_outputs, axis=1),
                            [-1, output_digits, n_hidden])

        linear = tf.einsum('ijk,kl->ijl', output, V) + c
        return tf.nn.softmax(linear)
    else:
        # 마지막 출력을 구한다
        linear = tf.matmul(decoder_outputs[-1], V) + c
        out = tf.nn.softmax(linear)
        outputs.append(out)

        output = tf.reshape(tf.concat(outputs, axis=1),
                            [-1, output_digits, n_out])
        return output

コード例 #14

0

ファイルを表示

    def hierarchy(self, inputs, y_masked, seq_length, scope_name, reuse=False):
        if scope_name == "pw":
            encoder_scope_name = "en_lstm_pw"
            decoder_scope_name = "de_lstm_pw"
        elif scope_name == "pph":
            encoder_scope_name = "en_lstm_pph"
            decoder_scope_name = "de_lstm_pph"
        else:
            encoder_scope_name = "en_lstm_iph"
            decoder_scope_name = "de_lstm_iph"

        with tf.variable_scope(name_or_scope=scope_name, reuse=reuse):
            #forward part
            lstm_forward1 = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # 加attention(这里的attention和encoder-decoder架构的attention稍有不同)
            lstm_forward1 = rnn.AttentionCellWrapper(cell=lstm_forward1,
                                                     attn_length=5)

            lstm_forward2 = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            #加attention
            lstm_forward2 = rnn.AttentionCellWrapper(cell=lstm_forward2,
                                                     attn_length=5)

            lstm_forward = rnn.MultiRNNCell(
                cells=[lstm_forward1, lstm_forward2])
            # dropout
            lstm_forward = rnn.DropoutWrapper(
                cell=lstm_forward,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p)

            #backward part
            lstm_backward1 = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # 加attention
            lstm_backward1 = rnn.AttentionCellWrapper(cell=lstm_backward1,
                                                      attn_length=5)

            lstm_backward2 = rnn.BasicLSTMCell(num_units=self.hidden_units_num)
            # 加attention
            lstm_backward2 = rnn.AttentionCellWrapper(cell=lstm_backward2,
                                                      attn_length=5)

            lstm_backward = rnn.MultiRNNCell(
                cells=[lstm_backward1, lstm_backward2])
            #drop out
            lstm_backward = rnn.DropoutWrapper(
                cell=lstm_backward,
                input_keep_prob=self.input_keep_prob_p,
                output_keep_prob=self.output_keep_prob_p)

            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_forward,
                cell_bw=lstm_backward,
                inputs=inputs,
                sequence_length=seq_length,
                dtype=tf.float32,
                scope=decoder_scope_name)
            outputs_forward = outputs[
                0]  # shape of h is [batch_size, max_time, cell_fw.output_size]
            outputs_backward = outputs[
                1]  # shape of h is [batch_size, max_time, cell_bw.output_size]
            # concat final outputs [batch_size, max_time, cell_fw.output_size*2]
            final_outputs = tf.concat(
                values=[outputs_forward, outputs_backward], axis=2)
            #shape of h: [batch * time_steps, hidden_units * 2]
            h = tf.reshape(tensor=final_outputs,
                           shape=(-1, self.hidden_units_num * 2))

            # 全连接dropout
            h = tf.nn.dropout(x=h, keep_prob=self.keep_prob_p)

            # fully connect layer(projection)
            weight = tf.get_variable(
                name="Weight",
                shape=(self.hidden_units_num * 2, self.class_num),
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            bias = tf.get_variable(
                name="Bias",
                shape=(self.class_num, ),
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            # logits:[batch_size*max_time, 2]
            #logits =tf.nn.elu(features=tf.matmul(h, weight) + bias)
            logits = tf.matmul(h, weight) + bias

            # logits in an normal way:[batch_size,max_time_stpes,2]
            logits_normal = tf.reshape(tensor=logits,
                                       shape=(-1, self.max_sentence_size,
                                              self.class_num),
                                       name="logits_normal")
            # logits_pw_masked [seq_len1+seq_len2+..+seq_lenn, 2]
            logits_masked = tf.boolean_mask(tensor=logits_normal,
                                            mask=self.mask,
                                            name="logits_masked")
            #print("logits_masked.shape", logits_masked.shape)

            # softmax
            prob_masked = tf.nn.softmax(logits=logits_masked,
                                        axis=-1,
                                        name="prob_pw_masked")
            #print("prob_masked.shape", prob_masked.shape)

            # prediction
            # pred:[batch_size*max_time,]
            pred = tf.cast(tf.argmax(logits, 1), tf.int32, name="pred")
            # pred in an normal way,[batch_size, max_time]
            pred_normal = tf.reshape(tensor=pred,
                                     shape=(-1, self.max_sentence_size),
                                     name="pred_normal")
            # one-hot the pred_normal:[batch_size, max_time,class_num]
            pred_normal_one_hot = tf.one_hot(indices=pred_normal,
                                             depth=self.class_num,
                                             name="pred_normal_one_hot")
            # pred_masked [seq_len1+seq_len2+....+,]
            pred_masked = tf.boolean_mask(tensor=pred_normal,
                                          mask=self.mask,
                                          name="pred_masked")

            # loss
            loss = tf.losses.sparse_softmax_cross_entropy(
                labels=y_masked,
                logits=logits_masked) + tf.contrib.layers.l2_regularizer(
                    self.lambda_pw)(weight)

            return loss, prob_masked, pred, pred_masked, pred_normal_one_hot

コード例 #15

0

ファイルを表示

ファイル: bigru_mnist.py プロジェクト: yehongyu/LearningTensorFlow

    #------------------------------------construct LSTM------------------------------------------#
    #place hoder
    X_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, TIME_STEPS, 28),
                         name="input_placeholder")
    y_p = tf.placeholder(dtype=tf.float32,
                         shape=(None, 10),
                         name="pred_placeholder")

    #gru instance
    gru_forward_1 = tf.nn.rnn_cell.GRUCell(
        num_units=HIDDEN_UNITS1,
        kernel_initializer=initializers.xavier_initializer(),
        bias_initializer=tf.initializers.random_normal())

    gru_forward_1 = rnn.AttentionCellWrapper(cell=gru_forward_1, attn_length=5)

    gru_forward_2 = tf.nn.rnn_cell.GRUCell(
        num_units=HIDDEN_UNITS,
        kernel_initializer=initializers.xavier_initializer(),
        bias_initializer=tf.initializers.random_normal())
    gru_forward_2 = rnn.AttentionCellWrapper(cell=gru_forward_2, attn_length=5)
    gru_forward = rnn.MultiRNNCell(cells=[gru_forward_1, gru_forward_2])

    gru_backward_1 = tf.nn.rnn_cell.GRUCell(
        num_units=HIDDEN_UNITS1,
        kernel_initializer=initializers.xavier_initializer(),
        bias_initializer=tf.initializers.random_normal())

    gru_backward_1 = rnn.AttentionCellWrapper(cell=gru_backward_1,
                                              attn_length=5)

コード例 #16

0

ファイルを表示

    def __init__(self, sequence_length, embedding_size, previous_component,
                 num_layers, bidirectional, attn_length, attn_size,
                 attn_vec_size):
        """
        Args:
          num_layers: The number of layers of the rnn model.
          bidirectional: boolean, Whether this is a bidirectional rnn.
          sequence_length: If sequence_length is provided, dynamic calculation is
            performed. This saves computational time when unrolling past max sequence
            length. Required for bidirectional RNNs.
          initial_state: An initial state for the RNN. This must be a tensor of
            appropriate type and shape [batch_size x cell.state_size].
          attn_length: integer, the size of attention vector attached to rnn cells.
          attn_size: integer, the size of an attention window attached to rnn cells.
          attn_vec_size: integer, the number of convolutional features calculated on
            attention state and the size of the hidden layer built from base cell
            state.

        """
        x = previous_component.embedded_expanded
        n_nodes = embedding_size

        if bidirectional:
            # forward direction cell
            fw_cell = rnn.GRUCell(n_nodes)
            bw_cell = rnn.GRUCell(n_nodes)
            # attach attention cells if specified
            if attn_length is not None:
                fw_cell = rnn.AttentionCellWrapper(fw_cell,
                                                   attn_length=attn_length,
                                                   attn_size=attn_size,
                                                   attn_vec_size=attn_vec_size,
                                                   state_is_tuple=False)
                bw_cell = rnn.AttentionCellWrapper(bw_cell,
                                                   attn_length=attn_length,
                                                   attn_size=attn_size,
                                                   attn_vec_size=attn_vec_size,
                                                   state_is_tuple=False)
            rnn_fw_cell = rnn.MultiRNNCell([fw_cell] * num_layers,
                                           state_is_tuple=False)
            # backward direction cell
            rnn_bw_cell = rnn.MultiRNNCell([bw_cell] * num_layers,
                                           state_is_tuple=False)
            outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=rnn_fw_cell,
                cells_bw=rnn_bw_cell,
                inputs=x,
                dtype=tf.dtypes.float32,
                sequence_length=sequence_length)
            self.last_layer = outputs

        else:
            rnn_cell = rnn.GRUCell(n_nodes)
            if attn_length is not None:
                rnn_cell = rnn.AttentionCellWrapper(
                    rnn_cell,
                    attn_length=attn_length,
                    attn_size=attn_size,
                    attn_vec_size=attn_vec_size,
                    state_is_tuple=False)
            cell = rnn.MultiRNNCell([rnn_cell] * num_layers,
                                    state_is_tuple=False)
            outputs, state = rnn.static_rnn(cell,
                                            x,
                                            dtype=tf.dtypes.float32,
                                            sequence_length=sequence_length)
            self.last_layer = outputs