Ejemplo n.º 1
0
    def __call__(self, inputs, state, name=None):

        with tf.variable_scope(tf.get_variable_scope()):
            # print(inputs)
            batch_size = self.h.get_shape().as_list()[0]
            seq_len = tf.shape(self.h)[1]
            emb_size = self.h.get_shape().as_list()[2]  #tf.shape(self.h)[2]
            flat_h = tf.reshape(self.h, [-1, emb_size])  #flatten h
            bs_times_seqlen = flat_h.get_shape().as_list()[
                0]  #tf.shape(flat_h)[0]
            tile_state = tf.tile(state, [seq_len, 1])
            '''2 linear layer should be seperated, because of the use of kernel'''
            with tf.variable_scope("val"):
                val = self._activation(
                    _linear([tile_state, flat_h], self.state_size, False))
            with tf.variable_scope("s"):
                s = _linear([val], 1, False)
                s = tf.reshape(s, [batch_size, -1])  #[batch_size, seq_len]
                a = tf.nn.softmax(s, 1)  #[batch_size, seq_length]
                a = tf.reshape(a, [-1])
                flat_h = tf.transpose(flat_h)
                cont = flat_h * a
                cont = tf.transpose(cont)
                cont = tf.reshape(cont, [batch_size, -1, emb_size])
                cont = tf.reduce_sum(cont, 1)  #[batch_size, emb_size]
                new_inputs = tf.concat([inputs, cont], 1)
                print('success')
            return self._cell(new_inputs, state)
Ejemplo n.º 2
0
 def call(self, inputs, state):
     """Gated recurrent unit (GRU) with nunits cells."""
     # inputs = realinputs + m +rt
     # rt's length is self._num_units
     # state = rt * older state
     # input = first 2 part
     totalLength = inputs.get_shape().as_list()[1]
     inputs_ = inputs[:, 0:totalLength - self._num_units]
     rth = inputs[:, totalLength - self._num_units:]
     inputs = inputs_
     state = math_ops.multiply(rth, state)
     with vs.variable_scope("gates"):  # Reset gate and update gate.
         # We start with bias of 1.0 to not reset and not update.
         bias_ones = self._bias_initializer
         if self._bias_initializer is None:
             dtype = [a.dtype for a in [inputs, state]][0]
             bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
         value = math_ops.sigmoid(
             _linear([inputs, state], 2 * self._num_units, True, bias_ones,
                     self._kernel_initializer))
         r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     with vs.variable_scope("candidate"):
         c = self._activation(
             _linear([inputs, r * state], self._num_units, True,
                     self._bias_initializer, self._kernel_initializer))
     new_h = u * state + (1 - u) * c
     return new_h, new_h
Ejemplo n.º 3
0
 def call(self, inputs=None, state=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     a_, c_ = tf_ops.get_max_pooling(self._num_units, self.contexts, inputs,
                                     state)
     with vs.variable_scope("sigmoid_gate"):
         if self.concat_context:
             inputs = tf.concat([inputs, c_])
             g_ = tf.nn.sigmoid(
                 _linear([inputs], self._num_units * 2, False))
         else:
             inputs = c_
             g_ = tf.nn.sigmoid(_linear([inputs], self._num_units, False))
     inputs = tf.multiply(inputs, g_)
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
         dtype = [a.dtype for a in [inputs, state]][0]
         bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     value = math_ops.sigmoid(
         _linear([inputs, state], 2 * self._num_units, True, bias_ones,
                 self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     with vs.variable_scope("candidate"):
         c = self._activation(
             _linear([inputs, r * state], self._num_units, True,
                     self._bias_initializer, self._kernel_initializer))
     new_h = u * state + (1 - u) * c
     if self.result_type is 'pred':
         outputs = a_
     else:
         outputs = new_h
     return outputs, new_h
Ejemplo n.º 4
0
 def call(self, inputs, state):
     """Gated recurrent unit (GRU) with nunits cells."""
     with vs.variable_scope("gates"):  # Reset gate and update gate.
         # We start with bias of 1.0 to not reset and not update.
         bias_ones = self._bias_initializer
         if self._bias_initializer is None:
             dtype = inputs.dtype
             bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
         # pylint: disable=protected-access
         value = math_ops.sigmoid(
             rnn_cell_impl._linear([inputs, state], 2 * self._num_units,
                                   True, bias_ones,
                                   self._kernel_initializer))
         r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
         # pylint: enable=protected-access
     with vs.variable_scope("candidate"):
         # pylint: disable=protected-access
         with vs.variable_scope("input_projection"):
             hi = rnn_cell_impl._linear(inputs, self._num_units, True,
                                        self._bias_initializer,
                                        self._kernel_initializer)
         with vs.variable_scope("hidden_projection"):
             hh = r * (rnn_cell_impl._linear(state, self._num_units, True,
                                             self._bias_initializer,
                                             self._kernel_initializer))
         # pylint: enable=protected-access
         c = self._activation(hi + hh)
     new_h = u * state + (1 - u) * c
     return new_h, new_h
Ejemplo n.º 5
0
 def call(self, inputs, state):
     with vs.variable_scope("gates"):
         bias_ones = self._bias_initializer
         if self._bias_initializer is None:
             dtype = [a.dtype for a in [inputs, state]][0]
             bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
         value = _linear([inputs, state], 2 * self._hidden_size, True,
                         bias_ones, aux.rum_ortho_initializer())
         r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
         u = sigmoid(u)
         if self._use_layer_norm:
             concat = tf.concat([r, u], 1)
             concat = aux.layer_norm_all(concat, 2, self._hidden_size,
                                         "LN_r_u")
             r, u = tf.split(concat, 2, 1)
     with vs.variable_scope("candidate"):
         x_emb = _linear(inputs, self._hidden_size, True,
                         self._bias_initializer, self._kernel_initializer)
         state_new = rotate(x_emb, r, state)
         if self._use_layer_norm:
             c = self._activation(aux.layer_norm(x_emb + state_new, "LN_c"))
         else:
             c = self._activation(x_emb + state_new)
     new_h = u * state + (1 - u) * c
     if self._T_norm != None:
         new_h = tf.nn.l2_normalize(new_h, 1,
                                    epsilon=self._eps) * self._T_norm
     if self._use_zoneout:
         new_h = aux.rum_zoneout(new_h, state, self._zoneout_keep_h,
                                 self._is_training)
     return new_h, new_h
Ejemplo n.º 6
0
 def call(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = inputs.dtype
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     # pylint: disable=protected-access
     value = math_ops.sigmoid(
         rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True,
                               bias_ones, self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     # pylint: enable=protected-access
   with vs.variable_scope("candidate"):
     # pylint: disable=protected-access
     with vs.variable_scope("input_projection"):
       hi = rnn_cell_impl._linear(inputs, self._num_units, True,
                                  self._bias_initializer,
                                  self._kernel_initializer)
     with vs.variable_scope("hidden_projection"):
       hh = r * (rnn_cell_impl._linear(state, self._num_units, True,
                                       self._bias_initializer,
                                       self._kernel_initializer))
     # pylint: enable=protected-access
     c = self._activation(hi + hh)
   new_h = u * state + (1 - u) * c
   return new_h, new_h
    def hyper_bias(self, layer, hyper_output, embedding_size, num_units, 
                   scope="hyper"):
        with tf.variable_scope(scope):
            with tf.variable_scope('zb'):
                zb = _linear(hyper_output, embedding_size, False)
            with tf.variable_scope('beta'):
                beta = _linear(zb, num_units, False)

        return layer + beta
Ejemplo n.º 8
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LRU.
          Args:
            inputs: input Tensor, 2D, batch x num_units.
            state: a state Tensor, `2-D, batch x state_size`. 
          Returns:
            A tuple containing:
            - A `2-D, [batch x num_units]`, Tensor representing the output of the
              LRU after reading `inputs` when previous state was `state`.
            - A `2-D, [batch x num_units]`, Tensor representing the new state of LRU after reading `inputs` when
              the previous state was `state`.  Same type and shape(s) as `state`.
          Raises:
            ValueError: 
            - If input size cannot be inferred from inputs via
              static shape inference.
            - If state is not `2D`.
        """
        if inputs.get_shape()[1] != self._num_units:
            with tf.variable_scope("input_transformation"):  
                W = tf.get_variable("kernel", [inputs.get_shape()[1], self._num_units], initializer = self._kernel_initializer)
                inputs = tf.matmul(inputs, W)

        ## r_1, r_2, z_1 and z_2  update & reset gates (resp. eq. 11, 12, 15 & 16)
        with tf.variable_scope("gates"):  
            # We start with bias of 1.0 to not reset and not update.
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
              dtype = [a.dtype for a in [inputs, state]][0]
              bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
            value = math_ops.sigmoid(
                _linear([inputs, state], 4 * self._num_units, True, bias_ones,
                        self._kernel_initializer))
            r1, r2, z1, z2 = array_ops.split(value=value, num_or_size_splits=4, axis=1)

        ## h1_hat
        with tf.variable_scope("projected_state1"):
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
              dtype = [a.dtype for a in [inputs, state]][0]
              bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
            h1_hat = tf.tanh(
                _linear([inputs, r2 * state], self._num_units, True, bias_ones,
                        self._kernel_initializer))
        ## h2_hat
        with tf.variable_scope("projected_state2"):
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
              dtype = [a.dtype for a in [inputs, state]][0]
              bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
            h2_hat = tf.tanh(
                _linear([r1 * inputs, state], self._num_units, True, bias_ones,
                        self._kernel_initializer))

        h1_prime = z1 * h2_hat + (1 - z1) * inputs
        h2_prime = z2 * h1_hat + (1 - z2) * state

        return h1_prime, h2_prime
Ejemplo n.º 9
0
def encode(input_embeds):
    '''Encoder'''
    with tf.variable_scope("h"):
        h = tf.nn.tanh(_linear(input_embeds, hp.M * hp.K / 2, True))
    with tf.variable_scope("logits"):
        logits = _linear(h, hp.M * hp.K, True)
        logits = tf.log(tf.nn.softplus(logits) + 1e-8)
    logits = tf.reshape(logits, [-1, hp.M, hp.K], name="logits")
    return logits
Ejemplo n.º 10
0
 def _encode(self, input_matrix, word_ids, embed_size):
     input_embeds = tf.nn.embedding_lookup(input_matrix, word_ids, name="input_embeds")
     with tf.variable_scope("h"):
         h = tf.nn.tanh(_linear(input_embeds, self.M*self.K/2, True))
     with tf.variable_scope("logits"):
         logits = _linear(h, M * K, True)
         logits = tf.log(tf.nn.softplus(logits) + 1e-8)
     logits = tf.reshape(logits, [-1, M, K], name="logits")
     return input_embeds, logits
Ejemplo n.º 11
0
 def __call__(self, inputs, state, scope=None):
     gru_out, gru_state = super(GRUCellAttn, self).__call__(inputs, state, scope)
     with vs.variable_scope(scope or type(self).__name__):
         with vs.variable_scope("Attn2"):
             gamma_h = tanh(rnn_cell_impl._linear(gru_out, self._num_units, False))
         weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True)
         weights = tf.exp(weights - tf.reduce_max(weights, reduction_indices=0, keep_dims=True))
         weights = weights / (1e-6 + tf.reduce_sum(weights, reduction_indices=0, keep_dims=True))
         context = tf.reduce_sum(self.hs * weights, reduction_indices=0)
         with vs.variable_scope("AttnConcat"):
             out = tf.nn.relu(rnn_cell_impl._linear([context, gru_out], self._num_units, False))
         self.attn_map = tf.squeeze(tf.slice(weights, [0, 0, 0], [-1, -1, 1]))
         return (out, out)
Ejemplo n.º 12
0
    def call(self, inputs, state):
        #extract the associative memory and the state
        size_batch = tf.shape(state)[0]
        assoc_mem, state = tf.split(
            state, [self._hidden_size * self._hidden_size, self._hidden_size],
            1)
        assoc_mem = tf.reshape(
            assoc_mem, [size_batch, self._hidden_size, self._hidden_size])

        with vs.variable_scope("gates"):
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                dtype = [a.dtype for a in [inputs, state]][0]
                bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
            value = _linear([inputs, state], 2 * self._hidden_size, True,
                            bias_ones, aux.rum_ortho_initializer())
            r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
            u = sigmoid(u)
            if self._use_layer_norm:
                concat = tf.concat([r, u], 1)
                concat = aux.layer_norm_all(concat, 2, self._hidden_size,
                                            "LN_r_u")
                r, u = tf.split(concat, 2, 1)
        with vs.variable_scope("candidate"):
            x_emb = _linear(inputs, self._hidden_size, True,
                            self._bias_initializer, self._kernel_initializer)
            tmp_rotation = rotation_operator(x_emb, r, self._hidden_size)
            Rt = tf.matmul(assoc_mem, tmp_rotation)

            state_new = tf.reshape(
                tf.matmul(
                    Rt, tf.reshape(state, [size_batch, self._hidden_size, 1])),
                [size_batch, self._hidden_size])
            if self._use_layer_norm:
                c = self._activation(aux.layer_norm(x_emb + state_new, "LN_c"))
            else:
                c = self._activation(x_emb + state_new)
        new_h = u * state + (1 - u) * c
        if self._T_norm != None:
            new_h = tf.nn.l2_normalize(new_h, 1,
                                       epsilon=self._eps) * self._T_norm
        if self._use_zoneout:
            new_h = aux.rum_zoneout(new_h, state, self._zoneout_keep_h,
                                    self._is_training)

        Rt = tf.reshape(Rt,
                        [size_batch, self._hidden_size * self._hidden_size])
        new_state = tf.concat([Rt, new_h], 1)
        return new_h, new_state
Ejemplo n.º 13
0
  def call(self, inputs, state):
    """Long short-term memory cell (LSTM).
    Args:
      inputs: `2-D` tensor with shape `[batch_size x input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size x self.state_size]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size x 2 * self.state_size]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
    sigmoid = math_ops.sigmoid
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

    concat = _linear([inputs, h], 4 * self._num_units, True)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)

    new_c = (
        c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
    new_h = self._activation(new_c) * sigmoid(o)

    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state
Ejemplo n.º 14
0
 def call(self, input, state):
     """Cached LSTM
         input, [N, dw]
         state, [N, k*d]
     """
     sigmoid = tf.nn.sigmoid
     if self._state_is_tuple:
         state, c = state
     N = tf.shape(state)[0]
     k = self._num_groups
     d = self._num_units / k
     state = list(tf.split(state, k, axis=1)) # [N*d, N*d, ,,, N*d] k item in total
     ro = rnn_cell_impl._linear([input] + state, 3*k*d, True, self._bias_initializer, self._kernel_initializer) # [N, 3*k*d]
     r, o, c = tf.split(ro, 3, axis=1) # [N, k*d]
     r = sigmoid(r) # [N, k*d]
     r = tf.add(tf.divide(tf.reshape(r, [N, k, d]), k), \
                 tf.tile(tf.expand_dims(tf.expand_dims(tf.range(0, 1, delta=1.0/float(k), dtype=tf.float32), 0), 2), [N, 1, d]))
     # r = tf.add(tf.divide(tf.reshape(r, [N, k, d]), k), tf.range(0, 1, delta=1.0/float(k), dtype=tf.float32))
     r = tf.reshape(r, [N, k*d])
     o = sigmoid(o)
     c_ = self._activation(c)
     new_c = (1 - r) * c + r * c_
     new_state = self._activation(new_c) * o
     if self._state_is_tuple:
         new_state = LSTMStateTuple(new_state, new_c)
     return new_state, new_state
Ejemplo n.º 15
0
 def call(self, input, state):
     d = self._num_units
     sigmoid = tf.nn.sigmoid
     if self._state_is_tuple:
         c_tm1, h_tm1 = state
     with tf.variable_scope('input'):
         input_ = rnn_cell_impl._linear(input, d, False, self._bias_initializer, self._kernel_initializer)
     with tf.variable_scope('fr'):
         fr = rnn_cell_impl._linear(input_, 2*d, True, self._bias_initializer, self._kernel_initializer)
     fr = sigmoid(fr)
     f, r = tf.split(fr, 2, axis=1) # [N, d]
     c_t = f * c_tm1 + (1 - f) * input
     h_t = r * self._activation(c_t) + (1 - r_t) * input_
     if self._state_is_tuple:
         new_state = LSTMStateTuple(c_t, h_t)
     return h_t, new_state
 def attention(query, use_attention=False):
     """Put attention masks on hidden using hidden_features and query."""
     attn_weights = []
     ds = []  # Results of attention reads will be stored here.
     for i in xrange(num_heads):
         with variable_scope.variable_scope("Attention_%d" % i):
             y = rnn_cell_impl._linear(query, attention_vec_size, True)
             y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
             # Attention mask is a softmax of v^T * tanh(...).
             s = math_ops.reduce_sum(
                 v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
             if use_attention is False:  # apply mean pooling
                 weights = tf.tile(sequence_length,
                                   tf.pack([attn_length]))
                 weights = array_ops.reshape(weights, tf.shape(s))
                 a = array_ops.ones(
                     tf.shape(s),
                     dtype=dtype) / math_ops.to_float(weights)
                 # a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1])
             else:
                 a = nn_ops.softmax(s)
             attn_weights.append(a)
             # Now calculate the attention-weighted vector d.
             d = math_ops.reduce_sum(
                 array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                 [1, 2])
             ds.append(array_ops.reshape(d, [-1, attn_size]))
     return attn_weights, ds
Ejemplo n.º 17
0
 def __call__(self, inputs, state, scope=None):
     with tf.variable_scope(scope or "gru_cell", reuse=self._reuse):
         #We start with bias of 1.0 to not reset and not update.
         #todo: implement the new_h calculation given inputs and state
         with tf.variable_scope("Gates"):  # Reset gate and update gate.
             # We start with bias of 1.0 to not reset and not update.
             concated = _linear([inputs, state], 2 * self._num_units, True,
                                init_ops.constant_initializer(
                                    1.0, dtype=tf.float32))
             r, u = array_ops.split(concated, 2, 1)
             r, u = tf.sigmoid(r), tf.sigmoid(u)
         with tf.variable_scope("Candidate"):
             c = self._activation(
                 _linear([inputs, r * state], self._num_units, True))
         new_h = u * state + (1 - u) * c
     return new_h, new_h
Ejemplo n.º 18
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM).
        @param: inputs (batch,n)
        @param state: the states and hidden unit of the two cells
        """
        with tf.variable_scope(scope or type(self).__name__):
            c1, c2, h1, h2 = state

            # change bias argument to False since LN will add bias via shift
            concat = _linear([inputs, h1, h2], 5 * self._num_units, False)

            i, j, f1, f2, o = tf.split(value=concat,
                                       num_or_size_splits=5,
                                       axis=1)

            # add layer normalization to each gate
            i = ln(i, scope='i/')
            j = ln(j, scope='j/')
            f1 = ln(f1, scope='f1/')
            f2 = ln(f2, scope='f2/')
            o = ln(o, scope='o/')

            new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) +
                     c2 * tf.nn.sigmoid(f2 + self._forget_bias) +
                     tf.nn.sigmoid(i) * self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c,
                                        scope='new_h/')) * tf.nn.sigmoid(o)
            new_state = LSTMStateTuple(new_c, new_h)

            return new_h, new_state
Ejemplo n.º 19
0
    def downscale(self, inp, mask):
        """
        改变input和mask的shape,为了构建Pyramid Structure RNN
        :param inp: shape()
        :param mask: shape()
        :return: out: shape()
                 mask: shape()
        """
        # return inp, mask

        with vs.variable_scope("Downscale"):
            inshape = tf.shape(inp)
            T, batch_size, dim = inshape[0], inshape[1], inshape[2]
            inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]),
                               [-1, 2 * self.size])  # 变成2*size
            out2d = rnn_cell_impl._linear(inp2d, self.size, True)  # 变成size
            out3d = tf.reshape(out2d,
                               tf.stack((batch_size, tf.to_int32(T / 2), dim)))
            out3d = tf.transpose(out3d, perm=[1, 0, 2])
            out3d.set_shape([None, None, self.size])
            out = tanh(out3d)

            mask = tf.transpose(mask)
            mask = tf.reshape(mask, [-1, 2])
            mask = tf.cast(mask, tf.bool)  # 数据类型覆盖成tf.bool(Boolean)
            mask = tf.reduce_any(mask, reduction_indices=1)
            mask = tf.to_int32(mask)
            mask = tf.reshape(mask, tf.stack([batch_size, -1]))
            mask = tf.transpose(mask)
        return out, mask
Ejemplo n.º 20
0
def linear(args,
           output_size,
           bias,
           bias_start=0.0,
           scope=None,
           squeeze=False,
           wd=0.0,
           input_keep_prob=1.0,
           is_train=None):
    with K.tf.variable_scope(scope or "linear"):
        if args is None or (nest.is_sequence(args) and not args):
            raise ValueError("`args` must be specified")
        if not nest.is_sequence(args):
            args = [args]

        flat_args = [flatten(arg, 1) for arg in args]
        # if input_keep_prob < 1.0:
        assert is_train is not None
        # flat_args = [K.tf.cond(is_train, lambda: K.tf.nn.dropout(arg, input_keep_prob), lambda: arg)
        #              for arg in flat_args]
        flat_args = [
            K.tf.nn.dropout(arg, input_keep_prob) for arg in flat_args
        ]
        flat_out = _linear(flat_args, output_size, bias)
        # flat_out = K.python.ops.rnn_cell._linear(flat_args, output_size, bias)
        out = reconstruct(flat_out, args[0], 1)
        if squeeze:
            out = K.tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1])
        # if wd:
        #     add_wd(wd)
    return out
Ejemplo n.º 21
0
def linear(args,
           output_size,
           bias,
           bias_start=0.0,
           scope=None,
           squeeze=False,
           wd=0.0,
           input_keep_prob=1.0,
           is_train=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("`args` must be specified")
    if not nest.is_sequence(args):
        args = [args]

    flat_args = [flatten(arg, 1) for arg in args]
    if input_keep_prob < 1.0:
        assert is_train is not None
        flat_args = [
            tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob),
                    lambda: arg) for arg in flat_args
        ]
    with tf.variable_scope(scope or 'Linear'):
        flat_out = _linear(
            flat_args,
            output_size,
            bias,
            bias_initializer=tf.constant_initializer(bias_start))
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1])
    if wd:
        add_wd(wd)

    return out
Ejemplo n.º 22
0
    def call(self, inputs, state):
        # inputs: [batch_size, in_size]
        # state:  [batch_size, output_size OR state_size]
        with tf.variable_scope("attention"):
            with tf.variable_scope("main_input"):
                # [batch_size, 1, att_size]
                att_main_in = tf.expand_dims(tf.einsum('ij,jk->ik', inputs,
                                                       self._WP),
                                             axis=1)

            with tf.variable_scope("s"):
                # [batch_size, num_match_elems, att_size]
                raw_in = tf.add(att_main_in, self._att_match_input)
                # [batch_size, num_match_elems, 1]
                s = tf.einsum('ijk,k->ij', tf.nn.tanh(raw_in), self._v)

            # [batch_size, num_match_elems]
            a = tf.nn.softmax(s, dim=1)
            # [batch_size, match_size]
            c = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2),
                                          self._match_input),
                              axis=1)

        raw_rnn_inputs = tf.concat([inputs, c], axis=1)

        with tf.variable_scope("pre_input_gate"):
            rnn_input_size = int(raw_rnn_inputs.get_shape()[1])
            rnn_input_gate = tf.sigmoid(
                _linear([raw_rnn_inputs], rnn_input_size, False))
            rnn_inputs = tf.multiply(raw_rnn_inputs, rnn_input_gate)

        new_h, new_h = self._base_cell.call(inputs=rnn_inputs, state=state)
        return new_h, new_h
Ejemplo n.º 23
0
    def __call__(self, inputs, state, scope=None):

        sigmoid = math_ops.sigmoid
        tanh = math_ops.tanh

        with tf.variable_scope(scope or type(self).__name__):
            with tf.variable_scope("r"):
                r = sigmoid(_linear([inputs, state], self._num_units, True))
            with tf.variable_scope("z"):
                z = sigmoid(_linear([inputs, state], self._num_units, True))
            with tf.variable_scope("h_tilde"):
                h_tilde = tanh(
                    _linear([inputs, r * state], self._num_units, True))
            new_h = (z * state) + ((1 - z) * h_tilde)

        return new_h, new_h
Ejemplo n.º 24
0
def graph(embedding_npy, M, K):
    vocab_size = embedding_npy.shape[0]
    emb_size = embedding_npy.shape[1]
    num_centroids = 2**K
    tau = tf.placeholder_with_default(np.array(1.0, dtype='float32'),
                                      tuple()) - 0.1

    embedding = tf.constant(embedding_npy, name="embedding")
    word_input = tf.placeholder_with_default(np.array([3, 4, 5],
                                                      dtype="int32"),
                                             shape=[None],
                                             name="word_input")
    word_lookup = tf.nn.embedding_lookup(embedding,
                                         word_input,
                                         name="word_lookup")

    A = tf.get_variable("codebook", [M * num_centroids, emb_size])

    with tf.variable_scope("h"):
        h = tf.nn.tanh(_linear(word_lookup, M * num_centroids / 2, True))
    with tf.variable_scope("logits"):
        logits_lookup = _linear(h, M * num_centroids, True)
        logits_lookup = tf.log(tf.nn.softplus(logits_lookup) + 1e-8)
    logits_lookup = tf.reshape(logits_lookup, [-1, M, num_centroids],
                               name="logits_lookup")

    D = gumbel_softmax(logits_lookup, tau, hard=False)
    D_prime = tf.reshape(D, [-1, M * num_centroids])
    y = tf.matmul(D_prime, A)

    loss = 0.5 * tf.reduce_sum((y - word_lookup)**2, axis=1)
    loss = tf.reduce_mean(loss, name="loss")

    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.Variable(0.0, trainable=False, name='learning_rate')

    max_grad_norm = 0.001
    tvars = tf.trainable_variables()
    grads = tf.gradients(loss, tvars)
    grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm)
    global_norm = tf.identity(global_norm, name="global_norm")
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars),
                                         global_step=global_step,
                                         name="train_op")

    return word_input, tau, learning_rate, train_op, loss, global_norm, D
Ejemplo n.º 25
0
    def call(self, inputs, state):

        d, cont = state
        with tf.variable_scope(tf.get_variable_scope()):

            batch_size = self.h.get_shape().as_list()[0]
            seq_len = tf.shape(self.h)[1]
            emb_size = self.h.get_shape().as_list()[2]  #tf.shape(self.h)[2]
            flat_h = tf.reshape(self.h, [-1, emb_size])  #flatten h
            bs_times_seqlen = flat_h.get_shape().as_list()[
                0]  #tf.shape(flat_h)[0]
            tile_state = tf.tile(d, [seq_len, 1])
            '''2 linear layer should be seperated, because of the use of kernel'''
            with tf.variable_scope("val"):
                val = self._activation(
                    _linear([tile_state, flat_h], self.state_size[0], True))
            with tf.variable_scope("s"):
                s = _linear([val],
                            1,
                            True,
                            bias_initializer=tf.constant_initializer(0))
                s = tf.reshape(s, [batch_size, -1])  #[batch_size, seq_len]
                a = tf.nn.softmax(s, 1)  #[batch_size, seq_length]
                a = tf.reshape(a, [-1])
                flat_h = tf.transpose(flat_h)
                cont = flat_h * a
                cont = tf.transpose(cont)
                cont = tf.reshape(cont, [batch_size, -1, emb_size])
                cont = tf.reduce_sum(cont, 1)  #[batch_size, emb_size]
                new_inputs = tf.concat([inputs, cont], 1)
        '''u: z_t'''
        with tf.variable_scope("gates"):  # Reset gate and update gate.
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                dtype = [a.dtype for a in [new_inputs, d]][0]
                bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
            value = math_ops.sigmoid(
                _linear([new_inputs, d], 2 * self._num_units, True, bias_ones,
                        self._kernel_initializer))
            r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
        with tf.variable_scope("candidate"):
            c = self._activation(
                _linear([new_inputs, r * d], self._num_units, True,
                        self._bias_initializer, self._kernel_initializer))
        new_h = u * d + (1 - u) * c

        return tf.concat([new_h, cont], 1), tuple([new_h, cont])
Ejemplo n.º 26
0
    def call(self, inputs, state):
        if Config.ATTN_TYPE == Config.attn_multimodal:
            # Attend using prev lstm state
            lstm_state, prev_attn_softmax = state
            input_i = inputs[:,0,:]
            input_a = inputs[:,1,:]
            attn_dim = Config.NCELLS
            inputs_attended, cur_attn_softmax = self._attention_multimodal(input_i, input_a, lstm_state.h, attn_dim, self.FUSION_CONC)

            # LSTM
            lstm_output, lstm_state = self._cell(inputs_attended, lstm_state)
            lstm_output = (lstm_output, cur_attn_softmax)

            # Postprocess
            new_state = RNNInputStateHandler.get_state_tuple({'lstm_state' : lstm_state, 'attn_state' : cur_attn_softmax}, is_global_state=False)

        elif Config.ATTN_TYPE == Config.attn_temporal:
            # Attend using prev attn_state
            state, attn_state, attn_state_hist = state
            input_size = inputs.get_shape().as_list()[1]# [0] is batch size, [1] is feature size 
            inputs_attended = rnn_cell_impl._linear(args=[inputs, attn_state], output_size=input_size, bias=True)

            # LSTM
            lstm_output, lstm_state = self._cell(inputs_attended, state)

            # Attention for next timestep
            new_state_cat = tf.concat(nest.flatten(lstm_state), 1) # NOTE this is [c,h] being used for _attention_temporal (not just h)
            attn_state_hist = tf.reshape(attn_state_hist, [-1, Config.ATTN_TEMPORAL_WINDOW, Config.ATTN_STATE_NCELLS])
            new_attn_state, new_attn_state_hist = self._attention_temporal(new_state_cat, attn_state_hist)

            # Projection layer
            if self._project_output: 
                with tf.variable_scope("attn_output_projection"):
                    output = rnn_cell_impl._linear(args=[lstm_output, new_attn_state], output_size=Config.ATTN_STATE_NCELLS, bias=True)
            else:
                output = new_attn_state

            # Postprocess
            new_attn_state_hist = tf.concat( [new_attn_state_hist, tf.expand_dims(output, 1)], 1) # Concats latest output to new_attn_state_hist
            new_attn_state_hist = tf.reshape( new_attn_state_hist, [-1, Config.ATTN_TEMPORAL_WINDOW * Config.ATTN_STATE_NCELLS])
            new_state = RNNInputStateHandler.get_state_tuple({'lstm_state' : lstm_state, 'attn_state' : new_attn_state, 'attn_state_hist' : new_attn_state_hist}, is_global_state=False)

        else:
            raise ValueError('Invalid Config.ATTN_TYPE selected. Check Config.py!')

        return lstm_output, new_state
Ejemplo n.º 27
0
 def __call__(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = [a.dtype for a in [inputs, state]][0]
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     value = rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True, bias_ones,\
                 self._kernel_initializer)
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     r,u=layer_normalization(r,scope="r/"),layer_normalization(u,scope="u/")
     r,u=math_ops.sigmoid(r),math_ops.sigmoid(u)
   with vs.variable_scope("candidate"):
     c = self._activation(rnn_cell_impl._linear([inputs, r * state], self._num_units, True, self._bias_initializer, self._kernel_initializer))
   new_h = u * state + (1 - u) * c
   return new_h, new_h
Ejemplo n.º 28
0
 def __call__(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = [a.dtype for a in [inputs, state]][0]
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     value = rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True, bias_ones,\
                 self._kernel_initializer)
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     r,u=layer_normalization(r,scope="r/"),layer_normalization(u,scope="u/")
     r,u=math_ops.sigmoid(r),math_ops.sigmoid(u)
   with vs.variable_scope("candidate"):
     c = self._activation(rnn_cell_impl._linear([inputs, r * state], self._num_units, True, self._bias_initializer, self._kernel_initializer))
   new_h = u * state + (1 - u) * c
   return new_h, new_h
Ejemplo n.º 29
0
    def __init__(self, num_units, encoder_output, scope=None):
        self.hs = encoder_output
        with vs.variable_scope(scope or type(self).__name__):
            with vs.variable_scope("Attn1"):
                hs2d = tf.reshape(self.hs, [-1, num_units])
                phi_hs2d = tanh(rnn_cell_impl._linear(hs2d, num_units, False))

                self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
        super(GRUCellAttn, self).__init__(num_units)
Ejemplo n.º 30
0
 def call(self, inputs, state):
   """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
   from tensorflow.python.ops.rnn_cell_impl import _linear
   output = self._activation(
       _linear([inputs, state], output_size=self._num_units,
       bias=False if self._bias_initializer is None else True,
       kernel_initializer=self._kernel_initializer,
       bias_initializer=self._bias_initializer))
   return output, output
Ejemplo n.º 31
0
    def __init__(self, num_units, encoder_output, scope=None):
        self.hs = encoder_output
        with vs.variable_scope(scope or type(self).__name__):
            with vs.variable_scope("Attn1"):
                hs2d = tf.reshape(self.hs, [-1, num_units])
                phi_hs2d = tanh(rnn_cell_impl._linear(hs2d, num_units, False))

                self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
        super(GRUCellAttn, self).__init__(num_units)
Ejemplo n.º 32
0
 def attention(query):
     """Point on hidden using hidden_features and query."""
     with vs.variable_scope("Attention"):
         y = rnn_cell_impl._linear(query, attention_vec_size, True)
         y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
         # Attention mask is a softmax of v^T * tanh(...).
         s = math_ops.reduce_sum(v * math_ops.tanh(hidden_features + y),
                                 [2, 3])
         return s
    def call(self, inputs, state):
        """
            Conditionl GRU operations

            inputs: [batch_size, num_units]
            state: (h=[batch_size, num_units], c=[batch_size, num_units])

            output: [batch_size, num_units]
            new_state: (h=[batch_size, num_units], c=[batch_size, num_units])
        """

        h = state.h
        c = state.c

        bias_ones = self._bias_initializer
        if self._bias_initializer is None:
            bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
        with vs.variable_scope('gates'):
            val_concat = rnn_cell_impl._linear(
                [inputs, h, c],
                2 * self._num_units,
                bias=False,
                bias_initializer=self._bias_initializer,
                kernel_initializer=self._kernel_initializer)

        val = math_ops.sigmoid(val_concat)
        r, z = array_ops.split(value=val, num_or_size_splits=2, axis=1)

        r_state = r * h

        with vs.variable_scope('candidate'):
            hbar_out = rnn_cell_impl._linear(
                [inputs, r_state, c],
                self._num_units,
                bias=False,
                bias_initializer=self._bias_initializer,
                kernel_initializer=self._kernel_initializer)

        hbar = self._activation(hbar_out)
        output = (1 - z) * h + z * hbar

        new_state = ConditionalGRUState(h=output, c=c)

        return output, new_state
Ejemplo n.º 34
0
 def call(self, inputs, state):
   inputs, encoded_question = inputs
   i = state.i
   state = state.h
   with tf.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = [a.dtype for a in [inputs, state]][0]
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     value = math_ops.sigmoid(
         _linear([inputs, state], 2 * self._num_units, True, bias_ones,
                 self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
   with tf.variable_scope("candidate"):
     c = self._activation(
         _linear([inputs, r * state], self._num_units, True, 
                 self._bias_initializer, self._kernel_initializer))
   new_h = u * state + (1 - u) * c
   self._history.append(new_h)
   
   with tf.variable_scope('attention'):
     question_dim = encoded_question.shape.as_list()
     hq = tf.tile(tf.expand_dims(encoded_question, 1), [1, self._time_step, 1])
     padding = tf.constant(0.0, shape=(self._batch_size, self._time_step - len(self._history), self._num_units))
     gru_h = tf.transpose(tf.convert_to_tensor(self._history), [1,0,2])
     gru_h = tf.concat([gru_h, padding], axis=1)
     hq = tf.reshape(hq, [-1, question_dim[-1]])
     gru_h = tf.reshape(gru_h, [-1, self._num_units])
     with tf.variable_scope('inner'):
       att = tf.tanh(_linear([gru_h, hq], self._att_hidden, True,
                     self._bias_initializer, self._kernel_initializer))
     with tf.variable_scope('outer'):
       att = _linear([att], 1, False,
                     self._bias_initializer, self._kernel_initializer)
       att = tf.reshape(att, [self._batch_size, self._time_step])
       att_mask = np.zeros([self._batch_size, self._time_step], dtype=np.float32)
       att_mask[:,i:] = 10000.0
       att_mask = tf.convert_to_tensor(att_mask)
       att = tf.reshape(tf.nn.softmax(att - att_mask), [-1, 1])
     final_h = tf.reduce_sum(tf.reshape(tf.multiply(gru_h, att), [self._batch_size, self._time_step, self._num_units]), axis=1)
     self._history[-1] = final_h
     
   return final_h, SXMState(h=final_h,i=i+1)
Ejemplo n.º 35
0
    def call(self, inputs, state):
        # inputs: [batch_size, in_size]
        # state:  [batch_size, output_size OR state_size]
        with tf.variable_scope("attention"):
            with tf.variable_scope("main_input"):
                # [batch_size, 1, layer_size]
                att_main_in = tf.expand_dims(_linear([inputs], self._num_units,
                                                     self._use_att_bias),
                                             axis=1)

            with tf.variable_scope("state_input"):
                # [batch_size, 1, layer_size]
                att_state_in = tf.expand_dims(_linear([state], self._num_units,
                                                      False),
                                              axis=1)

            with tf.variable_scope("s"):
                att_vec = tf.get_variable('att_vec', [self._num_units])
                # [batch_size, num_match_elems, layer_size]
                if self._use_state_for_att:
                    raw_in = tf.add(tf.add(att_main_in, att_state_in),
                                    self._att_match_in)
                else:
                    raw_in = tf.add(att_main_in, self._att_match_in)
                # [batch_size, num_match_elems, 1]
                s = tf.einsum('ijk,k->ij', tf.nn.tanh(raw_in), att_vec)

            # [batch_size, num_match_elems]
            a = tf.nn.softmax(s, dim=1)
            # [batch_size, match_size]
            c = tf.reduce_sum(tf.multiply(tf.expand_dims(a, axis=2),
                                          self._match_input),
                              axis=1)

        raw_rnn_inputs = tf.concat([inputs, c], axis=1)

        with tf.variable_scope("output_gate"):
            rnn_input_size = int(raw_rnn_inputs.get_shape()[1])
            rnn_input_gate = tf.sigmoid(
                _linear([raw_rnn_inputs], rnn_input_size, False))
            rnn_inputs = tf.multiply(raw_rnn_inputs, rnn_input_gate)

        new_h, new_h = self._base_cell.call(inputs=rnn_inputs, state=state)
        return new_h, new_h
Ejemplo n.º 36
0
 def call(self, inputs, state):
   """Long short-term memory cell with attention (LSTMA)."""
   state, attns, attn_states = state
   attn_states = array_ops.reshape(attn_states,
                                   [-1, self._attn_length, self._attn_size])
   input_size = self._input_size
   if input_size is None:
     input_size = inputs.shape.as_list()[1]
   inputs = _linear([inputs, attns], input_size, True)
   lstm_output, new_state = self._cell(inputs, state)
   new_state_cat = array_ops.concat(nest.flatten(new_state), 1)
   new_attns, new_attn_states = self._attention(new_state_cat, attn_states)
   with tf.variable_scope("attn_output_projection"):
     output = _linear([lstm_output, new_attns], self._attn_size, True)
   new_attn_states = array_ops.concat(
       [new_attn_states, array_ops.expand_dims(output, 1)], 1)
   new_attn_states = array_ops.reshape(
       new_attn_states, [-1, self._attn_length * self._attn_size])
   new_state = (new_state, new_attns, new_attn_states)
   return output, new_state
Ejemplo n.º 37
0
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, keep_prob=None, is_train=None):
    if args is None or (nest.is_sequence(args) and not args):
        raise ValueError("args must be specified")
    if not nest.is_sequence(args):
        args = [args]
    flat_args = [flatten(arg, 1) for arg in args]
    if keep_prob is not None and is_train is not None:
        flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, keep_prob), lambda: arg) for arg in flat_args]
    with tf.variable_scope(scope or 'linear'):
        flat_out = _linear(flat_args, output_size, bias, bias_initializer=tf.constant_initializer(bias_start))
    out = reconstruct(flat_out, args[0], 1)
    if squeeze:
        out = tf.squeeze(out, [len(args[0].get_shape().as_list())-1])
    return out
Ejemplo n.º 38
0
    def setup_loss(self):
        with vs.variable_scope("Logistic"):
            doshape = tf.shape(self.decoder_output)
            T, batch_size = doshape[0], doshape[1]
            do2d = tf.reshape(self.decoder_output, [-1, self.size])
            logits2d = rnn_cell_impl._linear(do2d, self.vocab_size, False)
            outputs2d = tf.nn.log_softmax(logits2d)
            self.outputs = tf.reshape(outputs2d, tf.stack([T, batch_size, self.vocab_size]))

            targets_no_GO = tf.slice(self.target_tokens, [1, 0], [-1, -1])
            masks_no_GO = tf.slice(self.target_mask, [1, 0], [-1, -1])
            # easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing
            labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1])
            mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1])
            losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits2d, labels=labels1d) * tf.to_float(
                mask1d)
            losses2d = tf.reshape(losses1d, tf.stack([T, batch_size]))
            self.losses = tf.reduce_sum(losses2d) / tf.to_float(batch_size)
Ejemplo n.º 39
0
    def downscale(self, inp, mask):
        # return inp, mask

        with vs.variable_scope("Downscale"):
            inshape = tf.shape(inp)
            T, batch_size, dim = inshape[0], inshape[1], inshape[2]
            inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
            out2d = rnn_cell_impl._linear(inp2d, self.size, False)
            out3d = tf.reshape(out2d, tf.stack((batch_size, tf.to_int32(T / 2), dim)))
            out3d = tf.transpose(out3d, perm=[1, 0, 2])
            out3d.set_shape([None, None, self.size])
            out = tanh(out3d)

            mask = tf.transpose(mask)
            mask = tf.reshape(mask, [-1, 2])
            mask = tf.cast(mask, tf.bool)
            mask = tf.reduce_any(mask, reduction_indices=1)
            mask = tf.to_int32(mask)
            mask = tf.reshape(mask, tf.stack([batch_size, -1]))
            mask = tf.transpose(mask)
        return out, mask
Ejemplo n.º 40
0
  def _attention(self, query, attn_states):
    conv2d = nn_ops.conv2d
    reduce_sum = math_ops.reduce_sum
    softmax = nn_ops.softmax
    tanh = math_ops.tanh

    with tf.variable_scope("attention"):
      k = tf.get_variable(
          "attn_w", [1, 1, self._attn_size, self._attn_vec_size])
      v = tf.get_variable("attn_v", [self._attn_vec_size])
      hidden = array_ops.reshape(attn_states,
                                 [-1, self._attn_length, 1, self._attn_size])
      hidden_features = conv2d(hidden, k, [1, 1, 1, 1], "SAME")
      y = _linear(query, self._attn_vec_size, True)
      y = array_ops.reshape(y, [-1, 1, 1, self._attn_vec_size])
      s = reduce_sum(v * tanh(hidden_features + y), [2, 3])
      a = softmax(s)
      d = reduce_sum(
          array_ops.reshape(a, [-1, self._attn_length, 1, 1]) * hidden, [1, 2])
      new_attns = array_ops.reshape(d, [-1, self._attn_size])
      new_attn_states = array_ops.slice(attn_states, [0, 1, 0], [-1, -1, -1])
      return new_attns, new_attn_states
Ejemplo n.º 41
0
        def beam_step(time, beam_probs, beam_seqs, cand_probs, cand_seqs, *states):
            batch_size = tf.shape(beam_probs)[0]
            inputs = tf.reshape(tf.slice(beam_seqs, [0, time], [batch_size, 1]), [batch_size])
            decoder_input = embedding_ops.embedding_lookup(self.L_dec, inputs)
            decoder_output, state_output = self.decoder_graph(decoder_input, states)

            with vs.variable_scope("Logistic", reuse=True):
                do2d = tf.reshape(decoder_output, [-1, self.size])
                logits2d = rnn_cell_impl._linear(do2d, self.vocab_size, False)
                logprobs2d = tf.nn.log_softmax(logits2d)

            total_probs = logprobs2d + tf.reshape(beam_probs, [-1, 1])
            total_probs_noEOS = tf.concat([tf.slice(total_probs, [0, 0], [batch_size, EOS_ID]),
                                           tf.tile([[-3e38]], [batch_size, 1]),
                                           tf.slice(total_probs, [0, EOS_ID + 1],
                                                    [batch_size, self.vocab_size - EOS_ID - 1])], 1)

            flat_total_probs = tf.reshape(total_probs_noEOS, [-1])
            beam_k = tf.minimum(tf.size(flat_total_probs), self.beam_size)
            next_beam_probs, top_indices = tf.nn.top_k(flat_total_probs, k=beam_k)

            next_bases = tf.floordiv(top_indices, self.vocab_size)
            next_mods = tf.mod(top_indices, self.vocab_size)

            next_states = [tf.gather(state, next_bases) for state in state_output]
            next_beam_seqs = tf.concat([tf.gather(beam_seqs, next_bases),
                                        tf.reshape(next_mods, [-1, 1])], 1)

            cand_seqs_pad = tf.pad(cand_seqs, [[0, 0], [0, 1]])
            beam_seqs_EOS = tf.pad(beam_seqs, [[0, 0], [0, 1]])
            new_cand_seqs = tf.concat([cand_seqs_pad, beam_seqs_EOS], 0)
            EOS_probs = tf.slice(total_probs, [0, EOS_ID], [batch_size, 1])
            new_cand_probs = tf.concat([cand_probs, tf.reshape(EOS_probs, [-1])], 0)

            cand_k = tf.minimum(tf.size(new_cand_probs), self.beam_size)
            next_cand_probs, next_cand_indices = tf.nn.top_k(new_cand_probs, k=cand_k)
            next_cand_seqs = tf.gather(new_cand_seqs, next_cand_indices)

            return [time + 1, next_beam_probs, next_beam_seqs, next_cand_probs, next_cand_seqs] + next_states
Ejemplo n.º 42
0
def createModel(input_data, input_size, sequence_length, slot_size, intent_size, layer_size = 128, isTraining = True):
    cell_fw = tf.contrib.rnn.BasicLSTMCell(layer_size)
    cell_bw = tf.contrib.rnn.BasicLSTMCell(layer_size)

    if isTraining == True:
        cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=0.5,
                                             output_keep_prob=0.5)
        cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=0.5,
                                             output_keep_prob=0.5)

    embedding = tf.get_variable('embedding', [input_size, layer_size])
    inputs = tf.nn.embedding_lookup(embedding, input_data)

    state_outputs, final_state = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=sequence_length, dtype=tf.float32)
    
    final_state = tf.concat([final_state[0][0], final_state[0][1], final_state[1][0], final_state[1][1]], 1)
    state_outputs = tf.concat([state_outputs[0], state_outputs[1]], 2)
    state_shape = state_outputs.get_shape()

    with tf.variable_scope('attention'):
        slot_inputs = state_outputs
        if remove_slot_attn == False:
            with tf.variable_scope('slot_attn'):
                attn_size = state_shape[2].value
                origin_shape = tf.shape(state_outputs)
                hidden = tf.expand_dims(state_outputs, 1)
                hidden_conv = tf.expand_dims(state_outputs, 2)
                # hidden shape = [batch, sentence length, 1, hidden size]
                k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
                hidden_features = tf.nn.conv2d(hidden_conv, k, [1, 1, 1, 1], "SAME")
                hidden_features = tf.reshape(hidden_features, origin_shape)
                hidden_features = tf.expand_dims(hidden_features, 1)
                v = tf.get_variable("AttnV", [attn_size])

                slot_inputs_shape = tf.shape(slot_inputs)
                slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])
                y = rnn_cell_impl._linear(slot_inputs, attn_size, True)
                y = tf.reshape(y, slot_inputs_shape)
                y = tf.expand_dims(y, 2)
                s = tf.reduce_sum(v * tf.tanh(hidden_features + y), [3])
                a = tf.nn.softmax(s)
                # a shape = [batch, input size, sentence length, 1]
                a = tf.expand_dims(a, -1)
                slot_d = tf.reduce_sum(a * hidden, [2])
        else:
            attn_size = state_shape[2].value
            slot_inputs = tf.reshape(slot_inputs, [-1, attn_size])

        intent_input = final_state
        with tf.variable_scope('intent_attn'):
            attn_size = state_shape[2].value
            hidden = tf.expand_dims(state_outputs, 2)
            k = tf.get_variable("AttnW", [1, 1, attn_size, attn_size])
            hidden_features = tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
            v = tf.get_variable("AttnV", [attn_size])

            y = rnn_cell_impl._linear(intent_input, attn_size, True)
            y = tf.reshape(y, [-1, 1, 1, attn_size])
            s = tf.reduce_sum(v*tf.tanh(hidden_features + y), [2,3])
            a = tf.nn.softmax(s)
            a = tf.expand_dims(a, -1)
            a = tf.expand_dims(a, -1)
            d = tf.reduce_sum(a * hidden, [1, 2])

            if add_final_state_to_intent == True:
                intent_output = tf.concat([d, intent_input], 1)
            else:
                intent_output = d

        with tf.variable_scope('slot_gated'):
            intent_gate = rnn_cell_impl._linear(intent_output, attn_size, True)
            intent_gate = tf.reshape(intent_gate, [-1, 1, intent_gate.get_shape()[1].value])
            v1 = tf.get_variable("gateV", [attn_size])
            if remove_slot_attn == False:
                slot_gate = v1 * tf.tanh(slot_d + intent_gate)
            else:
                slot_gate = v1 * tf.tanh(state_outputs + intent_gate)
            slot_gate = tf.reduce_sum(slot_gate, [2])
            slot_gate = tf.expand_dims(slot_gate, -1)
            if remove_slot_attn == False:
                slot_gate = slot_d * slot_gate
            else:
                slot_gate = state_outputs * slot_gate
            slot_gate = tf.reshape(slot_gate, [-1, attn_size])
            slot_output = tf.concat([slot_gate, slot_inputs], 1)

    with tf.variable_scope('intent_proj'):
        intent = rnn_cell_impl._linear(intent_output, intent_size, True)

    with tf.variable_scope('slot_proj'):
        slot = rnn_cell_impl._linear(slot_output, slot_size, True)

    outputs = [slot, intent]
    return outputs