コード例 #1
0
ファイル: custom_cell.py プロジェクト: hansonboy/SER
    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
            with tf.variable_scope("Gates"):  # Reset gate and update gate.
                # We start with bias of 1.0 to not reset and not update.
                ru = rnn_cell._linear([inputs, state], 2 * self._num_units,
                                      True, 1.0)
                ru = tf.nn.sigmoid(ru)
                r, u = tf.split(1, 2, ru)
            with tf.variable_scope("Candidate"):
                lambdas = rnn_cell._linear([inputs, state], self._num_weights,
                                           True)
                lambdas = tf.split(1, self._num_weights,
                                   tf.nn.softmax(lambdas))

                Ws = tf.get_variable("Ws",
                                     shape=[
                                         self._num_weights,
                                         inputs.get_shape()[1], self._num_units
                                     ])
                Ws = [
                    tf.squeeze(i) for i in tf.split(0, self._num_weights, Ws)
                ]

                candidate_inputs = []

                for idx, W in enumerate(Ws):
                    candidate_inputs.append(
                        tf.matmul(inputs, W) * lambdas[idx])

                Wx = tf.add_n(candidate_inputs)

                c = tf.nn.tanh(Wx + rnn_cell._linear(
                    [r * state], self._num_units, True, scope="second"))
            new_h = u * state + (1 - u) * c
        return new_h, new_h
コード例 #2
0
def linear(args,
           output_size,
           bias,
           bias_start=0.0,
           scope=None,
           squeeze=False,
           keep_prob=None,
           is_train=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("args must be specified")
    if not nest.is_sequence(args):
        args = [args]
    flat_args = [flatten(arg, 1) for arg in args]
    if keep_prob is not None and is_train is not None:
        flat_args = [
            tf.cond(is_train, lambda: tf.nn.dropout(arg, keep_prob),
                    lambda: arg) for arg in flat_args
        ]
    with tf.variable_scope(scope or 'linear'):
        flat_out = _linear(
            flat_args,
            output_size,
            bias,
            bias_initializer=tf.constant_initializer(bias_start))
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1])
    return out
コード例 #3
0
ファイル: nn.py プロジェクト: yuweitu/DS1011_Final_Projet
def linear(args,
           output_size,
           bias,
           bias_start=0.0,
           scope=None,
           squeeze=False,
           wd=0.0,
           input_keep_prob=1.0,
           is_train=None):
    with tf.variable_scope(scope or "linear"):
        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")
        if not nest.is_sequence(args):
            args = [args]

        flat_args = [flatten(arg, 1) for arg in args]
        # if input_keep_prob < 1.0:
        assert is_train is not None
        flat_args = [
            tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob),
                    lambda: arg) for arg in flat_args
        ]
        flat_out = _linear(flat_args, output_size, bias)
        out = reconstruct(flat_out, args[0], 1)
        if squeeze:
            out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1])
        if wd:
            add_wd(wd)

    return out
コード例 #4
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM).

    Args:
      inputs: (batch,n) tensor
      state: the states and hidden unit of the two cells

    Returns:
      new_state, new_inputs
    """
        with vs.variable_scope(scope or type(self).__name__):
            c1, c2, h1, h2 = state

            # change bias argument to False since LN will add bias via shift
            concat = _linear([inputs, h1, h2], 5 * self._num_units, False)

            i, j, f1, f2, o = array_ops.split(concat, 5, 1)

            # add layer normalization to each gate
            i = ln(i, scope='i/')
            j = ln(j, scope='j/')
            f1 = ln(f1, scope='f1/')
            f2 = ln(f2, scope='f2/')
            o = ln(o, scope='o/')

            new_c = (c1 * nn.sigmoid(f1 + self._forget_bias) +
                     c2 * nn.sigmoid(f2 + self._forget_bias) +
                     nn.sigmoid(i) * self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c, scope='new_h/')) * nn.sigmoid(o)
            new_state = rnn.LSTMStateTuple(new_c, new_h)

            return new_h, new_state
コード例 #5
0
 def attention(query, use_attention=False):
     """Put attention masks on hidden using hidden_features and query."""
     attn_weights = []
     ds = []  # Results of attention reads will be stored here.
     for i in xrange(num_heads):
         with variable_scope.variable_scope("Attention_%d" % i):
             y = rnn_cell._linear(query, attention_vec_size, True)
             y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
             # Attention mask is a softmax of v^T * tanh(...).
             s = math_ops.reduce_sum(
                 v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
             if use_attention is False:  # apply mean pooling
                 weights = tf.tile(sequence_length,
                                   tf.pack([attn_length]))
                 weights = array_ops.reshape(weights, tf.shape(s))
                 a = array_ops.ones(
                     tf.shape(s),
                     dtype=dtype) / math_ops.to_float(weights)
                 # a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1])
             else:
                 a = nn_ops.softmax(s)
             attn_weights.append(a)
             # Now calculate the attention-weighted vector d.
             d = math_ops.reduce_sum(
                 array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                 [1, 2])
             ds.append(array_ops.reshape(d, [-1, attn_size]))
     return attn_weights, ds
コード例 #6
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with tf.variable_scope(scope or type(self).__name__):
            c, h = state

            # change bias argument to False since LN will add bias via shift
            concat = rnn_cell._linear([inputs, h], 4 * self._num_units, False)

            i, j, f, o = tf.split(1, 4, concat)

            # add layer normalization to each gate
            i = ln(i, scope='i/')
            j = ln(j, scope='j/')
            f = ln(f, scope='f/')
            o = ln(o, scope='o/')

            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) +
                     tf.nn.sigmoid(i) * self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c,
                                        scope='new_h/')) * tf.nn.sigmoid(o)
            new_state = LSTMStateTuple(new_c, new_h)

            return new_h, new_state
コード例 #7
0
ファイル: cell.py プロジェクト: gokulsg/GRNN-SR
 def __call__(self, inputs, state, scope=None):
     with tf.variable_scope(scope or "grnnsp_cell"):
         c, h = state
         with tf.variable_scope("gates"):
             u_c, u_h, r_c, r_w = array_ops.split(
                 split_dim=1,
                 num_split=4,
                 value=tf.sigmoid(
                     _linear([inputs, c], 4 * self._num_units, True, 1.0)))
         with tf.variable_scope("inputs"):
             j_c = tf.tanh(
                 _linear([inputs, r_c * c],
                         self._num_units,
                         True,
                         scope="input_c"))
             j_h = tf.tanh(
                 _linear(inputs, self._num_units, True, scope="input_h"))
         new_c = u_c * c + (1 - u_c) * j_c
         new_h = u_h * h + (1 - u_h) * j_h
         new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)
         return new_h, new_state
コード例 #8
0
  def __call__(self, inputs, state, scope=None):
    step_t, state = state
    with vs.variable_scope(self._scope or type(self).__name__):  # "GRUCell"
      with tf.variable_scope("Gates_X"):
        # rx, ux = tf.split(1, 2, rnn_cell._linear([inputs],
                                                # 2 * self._num_units, False))
        # rh, uh = tf.split(1, 2, tf.matmul(state, self._Wgh) + self._Bgh)
        rx, ux = tf.split(rnn_cell._linear([inputs],
                                          2 * self._num_units, False),
                          num_or_size_splits=2, axis=1,)
        rh, uh = tf.split(tf.matmul(state, self._Wgh) + self._Bgh, num_or_size_splits=2, axis=1,)

        r, u = rx + rh, ux + uh
        r, u = sigmoid(r), sigmoid(u)
      with vs.variable_scope("Candidate"):
        cx = rnn_cell._linear([inputs], self._num_units, False)
        c = cx + tf.matmul(state * r, self._Wch) + self._Bch
        c = self._activation(c)
      new_h = u * state + (1 - u) * c
    active = (step_t % self._period) == 0
    new_h = active * new_h + (1 - active) * state
    return new_h, [new_h]
コード例 #9
0
ファイル: seq_labeling.py プロジェクト: sxdkxgwan/TF_seq2seq
 def attention(query):
   """Put attention masks on hidden using hidden_features and query."""
   attn_weights = []
   ds = []  # Results of attention reads will be stored here.
   for i in xrange(num_heads):
     with variable_scope.variable_scope("Attention_%d" % i):
       y = rnn_cell._linear(query, attention_vec_size, True)
       y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
       # Attention mask is a softmax of v^T * tanh(...).
       s = math_ops.reduce_sum(
           v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
       a = nn_ops.softmax(s)
       attn_weights.append(a)
       # Now calculate the attention-weighted vector d.
       d = math_ops.reduce_sum(
           array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
           [1, 2])
       ds.append(array_ops.reshape(d, [-1, attn_size]))
   return attn_weights, ds
コード例 #10
0
        def attention(query):
           
            if nest.is_sequence(query):
                query_list = nest.flatten(query)
            query = tf.concat(query_list,1) 

            with tf.variable_scope("Attention") as scope:
                    y = _linear(
                        args=query, output_size=attn_size, bias=True)

                    y = tf.reshape(y, [-1, 1, 1, attn_size]) 

                    s = tf.reduce_sum(
                        attention_softmax_weights *
                        tf.nn.tanh(hidden_features + y), [2, 3])
                    a = tf.nn.softmax(s)

                    c = tf.reduce_sum(tf.reshape(
                        a, [-1, attn_length, 1, 1])*hidden, [1,2])
                    cs=tf.reshape(c, [-1, attn_size])
            return cs,a
コード例 #11
0
    def __call__(self, inputs, state, scope=None):
        """Conditional long short-term memory cell (CLSTM)."""
        with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(1, 2, state)
            concat = _linear([inputs, h], 4 * self._num_units, True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(1, 4, concat)

            new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(1, [new_c, new_h])
            return new_h, new_state
コード例 #12
0
ファイル: bilstm_seq2seq.py プロジェクト: yzx1992/tri
 def attention(query):
     """Put attention masks on hidden using hidden_features and query."""
     weights = []
     ds = []  # Results of attention reads will be stored here.
     # if tf.nest.is_sequence(query):  # If the query is a tuple, flatten it.
     #     query_list = tf.nest.flatten(query)
     #     for q in query_list:  # Check that ndims == 2 if specified.
     #         ndims = q.get_shape().ndims
     #         if ndims:
     #             assert ndims == 2
     #     query = tf.concat(query_list, 1)
     for i in xrange(num_heads):
         with tf.variable_scope("Attention_%d" % i):
             y = rnn_cell._linear(query, attention_vec_size, True)
             y = tf.reshape(y, [-1, 1, 1, attention_vec_size])
             # Attention mask is a softmax of v^T * tanh(...).
             s = tf.reduce_sum(v[i] * tf.tanh(hidden_features[i] + y), [2, 3])
             a = tf.nn.softmax(s)
             weights.append(a)
             # Now calculate the attention-weighted vector d.
             d = tf.reduce_sum(tf.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2])
             ds.append(tf.reshape(d, [-1, attn_size]))
     return weights, ds
コード例 #13
0
ファイル: cell.py プロジェクト: Bolin0215/tf-sekelab
        def att_weights(inputs, state, memory):
            '''
            :param inputs: [N, i]
            :param state: [N, d]
            :param memory: [N, J, i]
            :return: [N, J]
            '''
            rank = len(memory.get_shape())
            memory_size = tf.shape(memory)[rank-2]
            tiled_inputs = tf.tile(tf.expand_dims(inputs, 1), [1, memory_size, 1])
            if isinstance(state, tuple):
                tiled_state = [tf.tile(tf.expand_dims(each, 1), [1, memory_size, 1]) for each in state]
            else:
                tiled_state = [tf.tile(tf.expand_dims(state, 1), [1, memory_size, 1])]

            in_ = tf.concat([tiled_inputs] + tiled_state + [memory], 2)
            flat_in = flatten(in_, 1)
            flat_in = [tf.nn.dropout(flat_in, input_keep_prob)]
            flat_out = _linear(flat_in, 1, bias)

            out = reconstruct(flat_out, in_, 1)
            out = tf.squeeze(out, [len(in_.get_shape().as_list())-1])
            return out
コード例 #14
0
    def __call__(self, inputs, state, d_act, scope=None):
        """Long short-term memory cell (LSTM)."""
        with vs.variable_scope(scope
                               or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                try:
                    c, h = array_ops.split(1, 2, state)
                except:
                    c, h = array_ops.split(state, 2, 1)
            concat = _linear([inputs, h], 4 * self._num_units, True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            try:
                i, j, f, o = array_ops.split(1, 4, concat)
            except:
                i, j, f, o = array_ops.split(concat, 4, 1)

            w_d = vs.get_variable('w_d',
                                  [self.key_words_voc_size, self._num_units])

            new_c = (c * sigmoid(f + self._forget_bias) +
                     sigmoid(i) * self._activation(j)) + tf.tanh(
                         tf.matmul(d_act, w_d))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                try:
                    new_state = array_ops.concat(1, [new_c, new_h])
                except:
                    new_state = array_ops.concat([new_c, new_h], 1)
            return new_h, new_state
コード例 #15
0
ファイル: seq_labeling.py プロジェクト: sxdkxgwan/TF_seq2seq
def attention_RNN(encoder_outputs, 
                  encoder_state,
                  num_decoder_symbols,
                  sequence_length,
                  num_heads=1,
                  dtype=dtypes.float32,
                  use_attention=True,
                  loop_function=None,
                  scope=None):
  if use_attention:
    print ('Use the attention RNN model')
    if num_heads < 1:
      raise ValueError("With less than 1 heads, use a non-attention decoder.")
  
    with variable_scope.variable_scope(scope or "attention_RNN"):
      output_size = encoder_outputs[0].get_shape()[1].value
      top_states = [array_ops.reshape(e, [-1, 1, output_size])
                  for e in encoder_outputs]
      attention_states = array_ops.concat(top_states, 1)
      if not attention_states.get_shape()[1:2].is_fully_defined():
        raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                       % attention_states.get_shape())
  
      batch_size = array_ops.shape(top_states[0])[0]  # Needed for reshaping.
      attn_length = attention_states.get_shape()[1].value
      attn_size = attention_states.get_shape()[2].value
  
      # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
      hidden = array_ops.reshape(
          attention_states, [-1, attn_length, 1, attn_size])
      hidden_features = []
      v = []
      attention_vec_size = attn_size  # Size of query vectors for attention.
      for a in xrange(num_heads):
        k = variable_scope.get_variable("AttnW_%d" % a,
                                        [1, 1, attn_size, attention_vec_size])
        hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
        v.append(variable_scope.get_variable("AttnV_%d" % a,
                                             [attention_vec_size]))
  
      def attention(query):
        """Put attention masks on hidden using hidden_features and query."""
        attn_weights = []
        ds = []  # Results of attention reads will be stored here.
        for i in xrange(num_heads):
          with variable_scope.variable_scope("Attention_%d" % i):
            y = rnn_cell._linear(query, attention_vec_size, True)
            y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
            # Attention mask is a softmax of v^T * tanh(...).
            s = math_ops.reduce_sum(
                v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
            a = nn_ops.softmax(s)
            attn_weights.append(a)
            # Now calculate the attention-weighted vector d.
            d = math_ops.reduce_sum(
                array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                [1, 2])
            ds.append(array_ops.reshape(d, [-1, attn_size]))
        return attn_weights, ds
  
      batch_attn_size = array_ops.stack([batch_size, attn_size])
      attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
               for _ in xrange(num_heads)]
      for a in attns:  # Ensure the second shape of attention vectors is set.
        a.set_shape([None, attn_size])
  
      # loop through the encoder_outputs
      attention_encoder_outputs = list()
      sequence_attention_weights = list()
      for i in xrange(len(encoder_outputs)):
        if i > 0:
          variable_scope.get_variable_scope().reuse_variables()
        if i == 0:
          with variable_scope.variable_scope("Initial_Decoder_Attention"):
            initial_state = rnn_cell._linear(encoder_state, output_size, True)
          attn_weights, ds = attention(initial_state)
        else:
          attn_weights, ds = attention(encoder_outputs[i])
        output = array_ops.concat([ds[0], encoder_outputs[i]], 1) # NOTE: here we temporarily assume num_head = 1
        with variable_scope.variable_scope("AttnRnnOutputProjection"):
          logit = rnn_cell._linear(output, num_decoder_symbols, True)
        attention_encoder_outputs.append(logit) # NOTE: here we temporarily assume num_head = 1
        sequence_attention_weights.append(attn_weights[0]) # NOTE: here we temporarily assume num_head = 1
  else:
    print ('Use the NON attention RNN model')
    with variable_scope.variable_scope(scope or "non-attention_RNN"):
      attention_encoder_outputs = list()
      sequence_attention_weights = list()
      
      # copy over logits once out of sequence_length
      if encoder_outputs[0].get_shape().ndims != 1:
        (fixed_batch_size, output_size) = encoder_outputs[0].get_shape().with_rank(2)
      else:
        fixed_batch_size = encoder_outputs[0].get_shape().with_rank_at_least(1)[0]

      if fixed_batch_size.value: 
        batch_size = fixed_batch_size.value
      else:
        batch_size = array_ops.shape(encoder_outputs[0])[0]
      if sequence_length is not None:
        sequence_length = math_ops.to_int32(sequence_length)
      if sequence_length is not None:  # Prepare variables
        zero_logit = array_ops.zeros(
            array_ops.pack([batch_size, num_decoder_symbols]), encoder_outputs[0].dtype)
        zero_logit.set_shape(
            tensor_shape.TensorShape([fixed_batch_size.value, num_decoder_symbols]))
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)
    
      for time, input_ in enumerate(encoder_outputs):
        if time > 0: variable_scope.get_variable_scope().reuse_variables()
        # pylint: disable=cell-var-from-loop
        # call_cell = lambda: cell(input_, state)
        generate_logit = lambda: rnn_cell._linear(encoder_outputs[time], num_decoder_symbols, True)
        # pylint: enable=cell-var-from-loop
        if sequence_length is not None:
          logit = _step(
              time, sequence_length, min_sequence_length, max_sequence_length, zero_logit, generate_logit)
        else:
          logit = generate_logit
        attention_encoder_outputs.append(logit)   
        
  return attention_encoder_outputs, sequence_attention_weights
コード例 #16
0
ファイル: bilstm_seq2seq.py プロジェクト: yzx1992/tri
 def AttenOutputProject(_output,_vocab_size):
     with tf.variable_scope("AttnRnnOutputProjection"):
         _logit = rnn_cell._linear(_output, _vocab_size, True)  # Tensor shape: [batch_size, num_cls]
     return _logit
コード例 #17
0
def rnn_with_output_feedback(cell,
                             inputs,
                             targets1,
                             targets1_num_symbols,
                             target1_emb_size,
                             target1_output_projection,
                             targets2,
                             targets2_num_symbols,
                             target2_emb_size,
                             target2_output_projection,
                             word_emb_size,
                             DNN_at_output,
                             zero_intent_thres=0,
                             sequence_length=None,
                             dtype=None,
                             train_with_true_label=True,
                             use_predicted_output=False):
    '''
  zero_intent_thres:  int, the intent contribution to context remain zero before this thres, 
                      and linear increase to 1 after that.
  '''
    if not isinstance(cell, tf.contrib.rnn.RNNCell):
        raise TypeError("cell must be an instance of RNNCell")
    if not isinstance(inputs, list):
        raise TypeError("inputs must be a list")
    if not isinstance(targets1, list):
        raise TypeError("targets1 must be a list")
    if not isinstance(targets2, list):
        raise TypeError("targets2 must be a list")
    if not inputs:
        raise ValueError("inputs must not be empty")
    if not dtype:
        raise ValueError(
            "dtype must be provided, which is to used in defining intial RNN state"
        )

    encoder_outputs = []
    intent_embedding = variable_scope.get_variable(
        "intent_embedding", [targets1_num_symbols, target1_emb_size])
    tag_embedding = variable_scope.get_variable(
        "tag_embedding", [targets2_num_symbols, target2_emb_size])
    # use predicted label if use_predicted_output during inference, use true label during training
    # To choose to always use predicted label, disable the if condition
    intent_loop_function = _extract_argmax_and_embed(
        intent_embedding,
        DNN_at_output,
        target1_output_projection,
        forward_only=use_predicted_output)  #if use_predicted_output else None
    tagging_loop_function = _extract_argmax_and_embed(
        tag_embedding,
        DNN_at_output,
        target2_output_projection,
        forward_only=use_predicted_output)
    intent_targets = [
        array_ops.reshape(math_ops.to_int64(x), [-1]) for x in targets1
    ]
    intent_target_embeddings = list()
    intent_target_embeddings = [
        embedding_ops.embedding_lookup(intent_embedding, target)
        for target in intent_targets
    ]
    tag_targets = [
        array_ops.reshape(math_ops.to_int64(x), [-1]) for x in targets2
    ]
    tag_target_embeddings = list()
    tag_target_embeddings = [
        embedding_ops.embedding_lookup(tag_embedding, target)
        for target in tag_targets
    ]

    if inputs[0].get_shape().ndims != 1:
        (fixed_batch_size, input_size) = inputs[0].get_shape().with_rank(2)
        if input_size.value is None:
            raise ValueError(
                "Input size (second dimension of inputs[0]) must be accessible via "
                "shape inference, but saw value None.")
    else:
        fixed_batch_size = inputs[0].get_shape().with_rank_at_least(1)[0]

    if fixed_batch_size.value:
        batch_size = fixed_batch_size.value
    else:
        batch_size = array_ops.shape(inputs[0])[0]

    state = cell.zero_state(batch_size, dtype)
    zero_output = array_ops.zeros(
        array_ops.stack([batch_size, cell.output_size]), inputs[0].dtype)
    zero_output.set_shape(
        tensor_shape.TensorShape([fixed_batch_size.value, cell.output_size]))

    if sequence_length is not None:  # Prepare variables
        sequence_length = math_ops.to_int32(sequence_length)
        min_sequence_length = math_ops.reduce_min(sequence_length)
        max_sequence_length = math_ops.reduce_max(sequence_length)

#  prev_cell_output = zero_output
    zero_intent_embedding = array_ops.zeros(
        array_ops.stack([batch_size, target1_emb_size]), inputs[0].dtype)
    zero_intent_embedding.set_shape(
        tensor_shape.TensorShape([fixed_batch_size.value, target1_emb_size]))
    zero_tag_embedding = array_ops.zeros(
        array_ops.stack([batch_size, target2_emb_size]), inputs[0].dtype)
    zero_tag_embedding.set_shape(
        tensor_shape.TensorShape([fixed_batch_size.value, target2_emb_size]))

    encoder_outputs = list()
    intent_logits = list()
    tagging_logits = list()
    sampled_intent_embeddings = list()
    sampled_tag_embeddings = list()

    for time, input_ in enumerate(inputs):
        # Bing: introduce output label embeddings as addtional input
        # if feed_previous (during testing):
        #     Use loop_function
        # if NOT feed_previous (during training):
        #     Use true target embedding
        if time == 0:
            current_intent_embedding = zero_intent_embedding
            current_tag_embedding = zero_tag_embedding

        if time > 0: variable_scope.get_variable_scope().reuse_variables()

        # here we introduce a max(0, t-4)/sequence_length intent weight
        thres = zero_intent_thres
        if time <= thres:
            intent_contribution = math_ops.to_float(0)
        else:
            intent_contribution = tf.div(math_ops.to_float(time - thres),
                                         math_ops.to_float(sequence_length))


#      intent_contribution = math_ops.to_float(1)

        x = rnn_cell._linear([
            tf.transpose(
                tf.transpose(current_intent_embedding) * intent_contribution),
            current_tag_embedding, input_
        ], word_emb_size, True)
        call_cell = lambda: cell(x, state)

        # pylint: enable=cell-var-from-loop
        if sequence_length is not None:
            (output_fw,
             state) = rnn._rnn_step(time, sequence_length, min_sequence_length,
                                    max_sequence_length, zero_output, state,
                                    call_cell, cell.state_size)
        else:
            (output_fw, state) = call_cell()

        encoder_outputs.append(output_fw)

        if use_predicted_output:
            intent_logit, current_intent_embedding = intent_loop_function(
                output_fw, time)
            tagging_logit, current_tag_embedding = tagging_loop_function(
                output_fw, time)
        else:
            if train_with_true_label is True:
                intent_logit = multilayer_perceptron_with_initialized_W(
                    output_fw,
                    target1_output_projection,
                    forward_only=use_predicted_output)
                tagging_logit = multilayer_perceptron_with_initialized_W(
                    output_fw,
                    target2_output_projection,
                    forward_only=use_predicted_output)
                current_intent_embedding = intent_target_embeddings[time]
                current_tag_embedding = tag_target_embeddings[time]
            else:
                intent_logit, current_intent_embedding = intent_loop_function(
                    output_fw, time)
                tagging_logit, current_tag_embedding = tagging_loop_function(
                    output_fw, time)
            # prev_symbols.append(prev_symbol)
        if time == 0:
            current_intent_embedding = zero_intent_embedding
            current_tag_embedding = zero_tag_embedding
        sampled_intent_embeddings.append(current_intent_embedding)
        sampled_tag_embeddings.append(current_tag_embedding)

        intent_logits.append(intent_logit)
        tagging_logits.append(tagging_logit)

    return encoder_outputs, state, intent_logits, tagging_logits, sampled_intent_embeddings, sampled_tag_embeddings
            Places an attention mask on hidden states from encoder
            using hidden and query. Query is a state of shape [N, H]
            """
            # results of the attention reads
            cs = [] # context vectors c_i

            # Flatten the query if it is a tuple
            if nest.is_sequence(query):
                # converts query from [N, H] to list of size N if [H, 1]
                query_list = nest.flatten(query)
            query = tf.concat(1, query_list) # becomes [H, N]

<<<<<<< HEAD
            for i in range(num_heads):
                with tf.variable_scope("Attention_%d" % i) as scope:
                    y = _linear(
                        args=query, output_size=pre_attn_size, bias=True)

                    # Reshape into 4D
                    y = tf.reshape(y, [-1, 1, 1, pre_attn_size]) # [N, 1, 1, H]

                    # Calculating alpha
                    s = tf.reduce_sum(V1[i] * tf.nn.tanh(hidden_features_pre[i] + y), [2, 3])
=======
            for a in range(num_heads):
                with tf.variable_scope("Attention_%d" % a) as scope:
                    y = tf.nn.rnn_cell._linear(
                        args=query, output_size=attn_size, bias=True)

                    # Reshape into 4D
                    y = tf.reshape(y, [-1, 1, 1, attn_size]) # [N, 1, 1, H]
コード例 #19
0
ファイル: speech.py プロジェクト: rdadolf/fathom-lite
 def __call__(self, inputs, state, scope=None):
   """Basic RNN: output = new_state = clipped_relu(W * input + U * state + B)."""
   with vs.variable_scope(scope or type(self).__name__):
     output = clipped_relu(_linear([inputs, state], self._num_units, True))
   return output, output
コード例 #20
0
    def __init__(self,
                 batch_size,
                 num_input,
                 num_hidden,
                 layer_depth,
                 rnn_type,
                 seq_length,
                 learning_rate,
                 keep_drop=0.5,
                 grad_clip=5.0,
                 is_training=False):
        self.num_input = num_input
        self.num_hidden = num_hidden
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.rnn_type = rnn_type
        self.layer_depth = layer_depth
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.is_training = is_training
        self.keep_drop = keep_drop
        self.x = tf.placeholder(tf.float32,
                                [batch_size, seq_length, self.num_input])

        # LSTM cells for encoder and decoder
        def create_cell():
            if rnn_type == "GRU":
                cell = rnn.GRUCell(num_hidden)
            elif rnn_type == "RAN":
                cell = RANCell(num_hidden,
                               normalize=tf.constant(self.is_training))
            cell = SwitchableDropoutWrapper(cell,
                                            output_keep_prob=self.keep_drop,
                                            is_train=tf.constant(
                                                self.is_training))
            return cell

        with tf.variable_scope(
                'encoder_cells',
                initializer=tf.contrib.layers.xavier_initializer()):
            self.enc_cell = rnn.DeviceWrapper(rnn.MultiRNNCell(
                [create_cell() for _ in range(layer_depth)]),
                                              device="/gpu:0")

        with tf.variable_scope(
                'decoder_cells',
                initializer=tf.contrib.layers.xavier_initializer()):
            self.dec_cell = rnn.DeviceWrapper(rnn.MultiRNNCell(
                [create_cell() for _ in range(layer_depth)]),
                                              device="/gpu:1")

        with tf.variable_scope('encoder'):
            outputs, _ = tf.nn.dynamic_rnn(cell=self.enc_cell,
                                           inputs=self.x,
                                           time_major=False,
                                           swap_memory=True,
                                           dtype=tf.float32)
            self.enc_output = outputs[:, -1, :]

        with tf.variable_scope('latent'):
            # reparametrization trick
            with tf.name_scope("Z"):
                self.z_mean = tf.contrib.layers.fully_connected(
                    inputs=self.enc_output,
                    num_outputs=num_hidden,
                    activation_fn=None,
                    scope="z_mean")
                self.z_stddev = tf.contrib.layers.fully_connected(
                    inputs=self.enc_output,
                    num_outputs=num_hidden,
                    activation_fn=tf.nn.softplus,
                    scope="z_ls2")

            # sample z from the latent distribution
            with tf.name_scope("z_samples"):
                with tf.name_scope('random_normal_sample'):
                    eps = tf.random_normal(
                        (batch_size, num_hidden), 0, 1,
                        dtype=tf.float32)  # draw a random number
                with tf.name_scope('z_sample'):
                    self.z = self.z_mean + tf.sqrt(
                        self.z_stddev) * eps  # a sample it from Z -> z

        with tf.variable_scope('decoder'):
            reversed_inputs = tf.reverse(self.x, [1])
            flat_targets = tf.reshape(reversed_inputs, [-1])
            dec_first_inp = tf.nn.relu(_linear(self.z, self.num_input, True))

            # [GO, ...inputs]
            dec_inputs = tf.concat(
                (tf.expand_dims(dec_first_inp, 1), reversed_inputs[:, 1:, :]),
                1)
            self.w1 = tf.get_variable(
                "w1",
                shape=[self.num_hidden, self.num_input],
                initializer=tf.contrib.layers.xavier_initializer())
            self.b1 = tf.get_variable("b1",
                                      shape=[self.num_input],
                                      initializer=tf.constant_initializer(0.0))
            self.initial_state = self.dec_cell.zero_state(batch_size,
                                                          dtype=tf.float32)
            dec_outputs, _ = tf.nn.dynamic_rnn(
                cell=self.dec_cell,
                inputs=dec_inputs,
                initial_state=self.initial_state,
                time_major=False,
                swap_memory=True,
                dtype=tf.float32)
        logist = tf.matmul(tf.reshape(dec_outputs, [-1, self.num_hidden]),
                           self.w1) + self.b1
        self.reconstruction = tf.reshape(logist, [-1])
        self.reconstruction_loss = 0.5 * tf.reduce_mean(
            tf.pow(self.reconstruction - flat_targets, 2.0))
        self.latent_loss = -0.5 * (1.0 + tf.log(self.z_stddev) -
                                   tf.square(self.z_mean) - self.z_stddev)
        self.latent_loss = tf.reduce_sum(self.latent_loss, 1) / tf.cast(
            seq_length, tf.float32)
        self.latent_loss = tf.reduce_sum(self.latent_loss) / tf.cast(
            batch_size, tf.float32)
        self.cost = tf.reduce_mean(self.reconstruction_loss + self.latent_loss)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)
        optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=0.001)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
コード例 #21
0
    def prepare_model(self):
        with tf.variable_scope("LSTMTDNN"):
            self.char_inputs = []
            self.word_inputs = []
            self.cnn_outputs = []

            if self.use_char:
                char_W = tf.get_variable(
                    "char_embed", [self.char_vocab_size, self.char_embed_dim])
            if self.use_word:
                word_W = tf.get_variable(
                    "word_embed", [self.word_vocab_size, self.word_embed_dim])

            with tf.variable_scope("CNN") as scope:
                self.char_inputs = tf.placeholder(
                    tf.int32,
                    [self.batch_size, self.seq_length, self.max_word_length])
                self.word_inputs = tf.placeholder(
                    tf.int32, [self.batch_size, self.seq_length])

                char_indices = tf.split(axis=1,
                                        num_or_size_splits=self.seq_length,
                                        value=self.char_inputs)
                word_indices = tf.split(axis=1,
                                        num_or_size_splits=self.seq_length,
                                        value=tf.expand_dims(
                                            self.word_inputs, -1))

                for idx in xrange(self.seq_length):
                    char_index = tf.reshape(char_indices[idx],
                                            [-1, self.max_word_length])
                    word_index = tf.reshape(word_indices[idx], [-1, 1])

                    if idx != 0:
                        scope.reuse_variables()

                    if self.use_char:
                        # [batch_size x word_max_length, char_embed]
                        char_embed = tf.nn.embedding_lookup(char_W, char_index)

                        char_cnn = TDNN(char_embed, self.char_embed_dim,
                                        self.feature_maps, self.kernels)

                        if self.use_word:
                            word_embed = tf.nn.embedding_lookup(
                                word_W, word_index)
                            cnn_output = tf.concat(axis=1,
                                                   values=[
                                                       char_cnn.output,
                                                       tf.squeeze(
                                                           word_embed, [1])
                                                   ])
                        else:
                            cnn_output = char_cnn.output
                    else:
                        cnn_output = tf.squeeze(
                            tf.nn.embedding_lookup(word_W, word_index))

                    if self.use_batch_norm:
                        bn = batch_norm()
                        norm_output = bn(
                            tf.expand_dims(tf.expand_dims(cnn_output, 1), 1))
                        cnn_output = tf.squeeze(norm_output)

                    if highway:
                        #cnn_output = highway(input_, input_dim_length, self.highway_layers, 0)
                        cnn_output = highway(cnn_output,
                                             cnn_output.get_shape()[1],
                                             self.highway_layers, 0)

                    self.cnn_outputs.append(cnn_output)

            with tf.variable_scope("LSTM") as scope:
                self.cell = tf.contrib.rnn.BasicLSTMCell(self.rnn_size)
                self.stacked_cell = tf.contrib.rnn.MultiRNNCell(
                    [self.cell] * self.layer_depth)

                outputs, _ = tf.contrib.rnn.static_rnn(self.stacked_cell,
                                                       self.cnn_outputs,
                                                       dtype=tf.float32)

                self.lstm_outputs = []
                self.true_outputs = tf.placeholder(
                    tf.int64, [self.batch_size, self.seq_length])

                loss = 0
                true_outputs = tf.split(axis=1,
                                        num_or_size_splits=self.seq_length,
                                        value=self.true_outputs)

                for idx, (top_h,
                          true_output) in enumerate(zip(outputs,
                                                        true_outputs)):
                    if self.dropout_prob > 0:
                        top_h = tf.nn.dropout(top_h, self.dropout_prob)

                    if self.hsm > 0:
                        self.lstm_outputs.append(top_h)
                    else:
                        if idx != 0:
                            scope.reuse_variables()
                        proj = _linear(top_h, self.word_vocab_size, 0)
                        self.lstm_outputs.append(proj)

                    loss += tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=self.lstm_outputs[idx],
                        labels=tf.squeeze(true_output))

                self.loss = tf.reduce_mean(loss) / self.seq_length
                tf.summary.scalar("loss", self.loss)
                tf.summary.scalar("perplexity", tf.exp(self.loss))
コード例 #22
0
def attention_decoder(decoder_inputs, initial_state, attention_states,
    cell, output_size, loop_function=None, dtype=None,
    scope=None):
    """
    Decoder with attention mechanism
    args:
        decoder_inputs: The inputs to the decoder, either the targets during training or the previous decoder output during inference.
        initial_state: The tensor used to initialize the first decoder step cell.
        attention_states: The encoder hidden states on which the decoder is supposed to attend to.
        cell: The decoder cell returned by the rnn_cell function.
        output_size: The number of decoder hidden state units.
        loop_function: The function that embeds the previous decoder step's output and provides as input to next decoder step
        dtype: the data type
        scope: the scope of the attention decoder
    
    """

    with tf.variable_scope(scope or 'attention_decoder', dtype=dtype) as scope:

        dtype = scope.dtype
        batch_size = tf.shape(decoder_inputs[0])[0] 
        attn_length = attention_states.get_shape()[1].value 
        if attn_length == None:
            attn_length = tf.shape(attention_states)[1]
        attn_size = attention_states.get_shape()[2].value

        hidden = tf.reshape(attention_states,
            [-1, attn_length, 1, attn_size]) 

        k = tf.get_variable("AttnW",
                [1, 1, attn_size, attn_size]) 
        hidden_features=tf.nn.conv2d(hidden, k, [1,1,1,1], "SAME")
        attention_softmax_weights=tf.get_variable(
                "W_attention_softmax", [attn_size])

        state = initial_state[0]

        def attention(query):
           
            if nest.is_sequence(query):
                query_list = nest.flatten(query)
            query = tf.concat(query_list,1) 

            with tf.variable_scope("Attention") as scope:
                    y = _linear(
                        args=query, output_size=attn_size, bias=True)

                    y = tf.reshape(y, [-1, 1, 1, attn_size]) 

                    s = tf.reduce_sum(
                        attention_softmax_weights *
                        tf.nn.tanh(hidden_features + y), [2, 3])
                    a = tf.nn.softmax(s)

                    c = tf.reduce_sum(tf.reshape(
                        a, [-1, attn_length, 1, 1])*hidden, [1,2])
                    cs=tf.reshape(c, [-1, attn_size])
            return cs,a

        outputs = []
        prev = None
        batch_attn_size = tf.stack([batch_size, attn_size])
        attns = tf.zeros(batch_attn_size, dtype=dtype)
        attns.set_shape([None, attn_size])
        
        wts_l=[]
        for i, inp in enumerate(decoder_inputs):

            if i > 0:
                tf.get_variable_scope().reuse_variables()

            if loop_function is not None and prev is not None:
                with tf.variable_scope("loop_function", reuse=True):
                    inp = loop_function(prev, i)
            input_size = inp.get_shape().with_rank(2)[1]
            
            #project decoder inputs and context vector to decoder input size
            x = _linear(
                args=[inp]+[attns], output_size=input_size, bias=True) 
            
            #Run a decoder step
            cell_outputs, state = cell(x, state) 

            attns,wts = attention([state])
            wts_l.append(wts)
            
            #project the decoder outputs and context vector to decoder output size
            with tf.variable_scope('attention_output_projection'):
                output = _linear(
                    args=[cell_outputs]+[attns], output_size=output_size,
                    bias=True)
            if loop_function is not None:
                prev = output
            outputs.append(output)

        return outputs, state , wts_l