Ejemplo n.º 1
0
    def call(self, inputs, state, att_score=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(
                    1.0, dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
        c = self._activation(self._candidate_linear([inputs, r_state]))
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h
Ejemplo n.º 2
0
    def call(self, inputs, state, att_score=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0,
                                                          dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
        c = self._activation(self._candidate_linear([inputs, r_state]))
        new_h = (1. - att_score) * state + att_score * c
        return new_h, new_h
Ejemplo n.º 3
0
    def call(self, inputs, state, att_score=None):
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(1.0,
                                                          dtype=inputs.dtype)
            with vs.variable_scope("gates"):
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value, num_or_size_splits=2, axis=1)
        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)

        c = self._activation(self._candidate_linear([inputs, r_state]))
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h
Ejemplo n.º 4
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            sigmoid = tf.sigmoid
            # Parameters of gates are concatenated into one multiply for
            # efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

            if self._linear is None:
                # self._linear = rnn_cell_impl._Linear(
                self._linear = core_rnn_cell._Linear(
                    [inputs, h], 4 * self._num_units, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(
                value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)

            new_c = (
                c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j))
            new_h = bit_utils.round_bit(self._activation(
                new_c) * sigmoid(o), bit=self._f_bit)

            if self._state_is_tuple:
                new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], 1)
        return new_h, new_state
Ejemplo n.º 5
0
    def call(self, inputs, state):
        def replace_w(x):
            if x.op.name.endswith('kernel'):
                return bit_utils.quantize_w(tf.tanh(x), bit=self._w_bit)
            else:
                return x

        with bit_utils.replace_variable(replace_w):
            if self._gate_linear is None:
                bias_ones = self._bias_initializer
                if self._bias_initializer is None:
                    bias_ones = tf.constant_initializer(
                        1.0, dtype=inputs.dtype)
                with tf.variable_scope("gates"):  # Reset gate and update gate.
                    # self._gate_linear = rnn_cell_impl._Linear(
                    self._gate_linear = core_rnn_cell._Linear(
                        [inputs, state],
                        2 * self._num_units,
                        True,
                        bias_initializer=bias_ones,
                        kernel_initializer=self._kernel_initializer)

            value = tf.sigmoid(self._gate_linear([inputs, state]))
            r, u = tf.split(value=value, num_or_size_splits=2, axis=1)

            r_state = bit_utils.round_bit(r * state, bit=self._f_bit)
            if self._candidate_linear is None:
                with tf.variable_scope("candidate"):
                    # self._candidate_linear = rnn_cell_impl._Linear(
                    self._candidate_linear = core_rnn_cell._Linear(
                        [inputs, r_state],
                        self._num_units,
                        True,
                        bias_initializer=self._bias_initializer,
                        kernel_initializer=self._kernel_initializer)
            c = self._activation(self._candidate_linear([inputs, r_state]))
            c = bit_utils.round_bit(c, bit=self._f_bit)
            new_h = bit_utils.round_bit(
                u * state + (1 - u) * c, bit=self._f_bit)
        return new_h, new_h
Ejemplo n.º 6
0
    def __call__(self, inputs, state, scope=None):

        num_proj = self._num_units if self._num_proj is None else self._num_proj
        sigmoid = math_ops.sigmoid

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]

        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        if self._linear1 is None:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                if self._num_unit_shards is not None:
                    unit_scope.set_partitioner(
                        partitioned_variables.fixed_size_partitioner(
                            self._num_unit_shards))
                self._linear1 = _Linear([inputs, m_prev], 5 * self._num_units,
                                        True)

                # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        lstm_matrix = self._linear1([inputs, m_prev])

        i, j, f, o, g = array_ops.split(value=lstm_matrix,
                                        num_or_size_splits=5,
                                        axis=1)

        with tf.variable_scope("highway"):

            k = array_ops.split(value=_linear([inputs], self._num_units, True),
                                num_or_size_splits=1,
                                axis=1)

        # Diagonal connections
        if self._use_peepholes and not self._w_f_diag:
            scope = vs.get_variable_scope()
            with vs.variable_scope(
                    scope, initializer=self._initializer) as unit_scope:
                with vs.variable_scope(unit_scope):
                    self._w_f_diag = vs.get_variable("w_f_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_i_diag = vs.get_variable("w_i_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)
                    self._w_o_diag = vs.get_variable("w_o_diag",
                                                     shape=[self._num_units],
                                                     dtype=dtype)

        if self._use_peepholes:
            c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) *
                 c_prev +
                 sigmoid(i + self._w_i_diag * c_prev) * self._activation(j))
        else:
            c = (sigmoid(f + self._forget_bias) * c_prev +
                 sigmoid(i) * self._activation(j))

        if self._cell_clip is not None:
            # pylint: disable=invalid-unary-operand-type
            c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
            # pylint: enable=invalid-unary-operand-type
        if self._use_peepholes:
            sigmoidg = sigmoid(g)
            su = 1. - sigmoidg

            m = sigmoidg * sigmoid(o + self._w_o_diag * c) * self._activation(
                c) + tf.squeeze(su * k, axis=0)
        else:
            sigmoidg = sigmoid(g)
            su = 1. - sigmoidg
            m = sigmoidg * sigmoid(o) * self._activation(c) + tf.squeeze(
                su * k, axis=0)

        if self._num_proj is not None:
            if self._linear2 is None:
                scope = vs.get_variable_scope()
                with vs.variable_scope(scope, initializer=self._initializer):
                    with vs.variable_scope("projection") as proj_scope:
                        if self._num_proj_shards is not None:
                            proj_scope.set_partitioner(
                                partitioned_variables.fixed_size_partitioner(
                                    self._num_proj_shards))
                        self._linear2 = _Linear(m, self._num_proj, False)
            m = self._linear2(m)

            if self._proj_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                m = clip_ops.clip_by_value(m, -self._proj_clip,
                                           self._proj_clip)
                # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat([c, m], 1))

        return m, new_state
Ejemplo n.º 7
0
  def call(self, inputs, state, att_score=None):
    time_now_score = tf.expand_dims(inputs[:,-1], -1)
    time_last_score = tf.expand_dims(inputs[:,-2], -1)
    inputs = inputs[:,:-2]
    inputs = inputs * att_score
    num_proj = self._num_units if self._num_proj is None else self._num_proj
    sigmoid = math_ops.sigmoid

    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = inputs.dtype
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
            
    if self._time_kernel_w1 is None:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        with vs.variable_scope(unit_scope):
          self._time_input_w1 = vs.get_variable(
              "_time_input_w1", shape=[self._num_units], dtype=dtype)
          self._time_input_bias1 = vs.get_variable(
              "_time_input_bias1", shape=[self._num_units], dtype=dtype)
          self._time_input_w2 = vs.get_variable(
              "_time_input_w2", shape=[self._num_units], dtype=dtype)
          self._time_input_bias2 = vs.get_variable(
              "_time_input_bias2", shape=[self._num_units], dtype=dtype)
          self._time_kernel_w1 = vs.get_variable(
              "_time_kernel_w1", shape=[input_size, self._num_units], dtype=dtype)
          self._time_kernel_t1 = vs.get_variable(
              "_time_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)
          self._time_bias1 = vs.get_variable(
              "_time_bias1", shape=[self._num_units], dtype=dtype)
          self._time_kernel_w2 = vs.get_variable(
              "_time_kernel_w2", shape=[input_size, self._num_units], dtype=dtype)
          self._time_kernel_t2 = vs.get_variable(
              "_time_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)
          self._time_bias2 = vs.get_variable(
              "_time_bias2", shape=[self._num_units], dtype=dtype)
          self._o_kernel_t1 = vs.get_variable(
              "_o_kernel_t1", shape=[self._num_units, self._num_units], dtype=dtype)    
          self._o_kernel_t2 = vs.get_variable(
              "_o_kernel_t2", shape=[self._num_units, self._num_units], dtype=dtype)  
                
    time_now_input = tf.nn.tanh(time_now_score * self._time_input_w1 + self._time_input_bias1)
    time_last_input = tf.nn.tanh(time_last_score * self._time_input_w2 + self._time_input_bias2)      

    time_now_state = math_ops.matmul(inputs, self._time_kernel_w1) + math_ops.matmul(time_now_input, self._time_kernel_t1) + self._time_bias1
    time_last_state = math_ops.matmul(inputs, self._time_kernel_w2) + math_ops.matmul(time_last_input, self._time_kernel_t2) + self._time_bias2
    
    if self._linear1 is None:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        if self._num_unit_shards is not None:
          unit_scope.set_partitioner(
              partitioned_variables.fixed_size_partitioner(
                  self._num_unit_shards))
        self._linear1 = _Linear([inputs, m_prev], 4 * self._num_units, True)

    # i = input_gate, j = new_input, f = forget_gate, o = output_gate
    lstm_matrix = self._linear1([inputs, m_prev])
    i, j, f, o = array_ops.split(
        value=lstm_matrix, num_or_size_splits=4, axis=1)
    o = o + math_ops.matmul(time_now_input, self._o_kernel_t1) + math_ops.matmul(time_last_input, self._o_kernel_t2)   
    # Diagonal connections
    if self._use_peepholes and not self._w_f_diag:
      scope = vs.get_variable_scope()
      with vs.variable_scope(
          scope, initializer=self._initializer) as unit_scope:
        with vs.variable_scope(unit_scope):
          self._w_f_diag = vs.get_variable(
              "w_f_diag", shape=[self._num_units], dtype=dtype)
          self._w_i_diag = vs.get_variable(
              "w_i_diag", shape=[self._num_units], dtype=dtype)
          self._w_o_diag = vs.get_variable(
              "w_o_diag", shape=[self._num_units], dtype=dtype)

    if self._use_peepholes:
      c = (sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * sigmoid(time_last_state) * c_prev +
           sigmoid(i + self._w_i_diag * c_prev) * sigmoid(time_now_state) * self._activation(j))
    else:
      c = (sigmoid(f + self._forget_bias) * sigmoid(time_last_state) * c_prev + sigmoid(i) * sigmoid(time_now_state) * self._activation(j))

    if self._cell_clip is not None:
      # pylint: disable=invalid-unary-operand-type
      c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
      # pylint: enable=invalid-unary-operand-type
    if self._use_peepholes:
      m = sigmoid(o + self._w_o_diag * c) * self._activation(c)
    else:
      m = sigmoid(o) * self._activation(c)

    if self._num_proj is not None:
      if self._linear2 is None:
        scope = vs.get_variable_scope()
        with vs.variable_scope(scope, initializer=self._initializer):
          with vs.variable_scope("projection") as proj_scope:
            if self._num_proj_shards is not None:
              proj_scope.set_partitioner(
                  partitioned_variables.fixed_size_partitioner(
                      self._num_proj_shards))
            self._linear2 = _Linear(m, self._num_proj, False)
      m = self._linear2(m)

      if self._proj_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
        # pylint: enable=invalid-unary-operand-type

    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                 array_ops.concat([c, m], 1))
    return m, new_state