def GetParams(self):
   """Tests for scale & elementwise layers in TF-TRT."""
   input_name = "input"
   input_dims = [10, 24, 24, 20]
   output_name = "output"
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(
         dtype=dtypes.float32, shape=input_dims, name=input_name)
     for weights_shape in [
         (1,),  # scale
         (24, 1, 1),  # scale
         (24, 24, 20),  # scale
         (20,),  # elementwise
         (1, 24, 1, 1),  # elementwise
         (1, 24, 24, 1),  # elementwise
         (1, 24, 24, 20),  # elementwise
         (24, 20),  # elementwise
     ]:
       a = self._ConstOp(weights_shape)
       f = x + a
       x = math_ops.sigmoid(f)
       a = self._ConstOp(weights_shape)
       f = a + x
       x = math_ops.sigmoid(f)
     gen_array_ops.reshape(x, [5, -1], name=output_name)
   return trt_test.TfTrtIntegrationTestParams(
       gdef=g.as_graph_def(),
       input_names=[input_name],
       input_dims=[input_dims],
       output_names=[output_name],
       expected_output_dims=[(5, 23040)])
Beispiel #2
0
  def call(self, inputs, state):
    """
    """
    (c_prev, m_prev) = state
    self._batch_size = inputs.shape[0].value or array_ops.shape(inputs)[0]
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope, initializer=self._initializer):
      x = array_ops.concat([inputs, m_prev], axis=1)
      with vs.variable_scope("first_gemm"):
        if self._linear1 is None:
          # no bias for bottleneck
          self._linear1 = _Linear(x, self._fact_size, False)
        R_fact = self._linear1(x)
      with vs.variable_scope("second_gemm"):
        if self._linear2 is None:
          self._linear2 = _Linear(R_fact, 4*self._num_units, True)
        R = self._linear2(R_fact)
      i, j, f, o = array_ops.split(R, 4, 1)

      c = (math_ops.sigmoid(f + self._forget_bias) * c_prev +
           math_ops.sigmoid(i) * math_ops.tanh(j))
      m = math_ops.sigmoid(o) * self._activation(c)

    if self._num_proj is not None:
      with vs.variable_scope("projection"):
        if self._linear3 is None:
          self._linear3 = _Linear(m, self._num_proj, False)
        m = self._linear3(m)

    new_state = rnn_cell_impl.LSTMStateTuple(c, m)
    return m, new_state
Beispiel #3
0
  def __call__(self, inputs, state, scope=None):
    """LSTM cell with layer normalization and recurrent dropout."""

    with vs.variable_scope(scope or type(self).__name__) as scope:  # LayerNormBasicLSTMCell  # pylint: disable=unused-variables
      c, h = state
      args = array_ops.concat(1, [inputs, h])
      concat = self._linear(args)

      i, j, f, o = array_ops.split(1, 4, concat)
      if self._layer_norm:
        i = self._norm(i, "input")
        j = self._norm(j, "transform")
        f = self._norm(f, "forget")
        o = self._norm(o, "output")

      g = self._activation(j)
      if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
        g = nn_ops.dropout(g, self._keep_prob, seed=self._seed)

      new_c = (c * math_ops.sigmoid(f + self._forget_bias)
               + math_ops.sigmoid(i) * g)
      if self._layer_norm:
        new_c = self._norm(new_c, "state")
      new_h = self._activation(new_c) * math_ops.sigmoid(o)

      new_state = rnn_cell.LSTMStateTuple(new_c, new_h)
      return new_h, new_state
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = array_ops.split(1, 2, state)     
        
            i = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "i")  
            j = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "j")   
            f = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "f")   
            o = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "o")   
        
#             concat = _linear([inputs, h], 4 * self._num_units, True)
#             # i = input_gate, j = new_input, f = forget_gate, o = output_gate
#             i , j, f, o = array_ops.split(1, 4, concat)

            new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) *
                     self._activation(j))
            new_h = self._activation(new_c) * sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = array_ops.concat(1, [new_c, new_h])
            return new_h, new_state
Beispiel #5
0
 def _logits_to_prediction(self, logits=None):
   predictions = {PredictionKey.LOGITS: logits}
   if self.logits_dimension == 1:
     predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits)
     logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
   predictions[PredictionKey.PROBABILITIES] = math_ops.sigmoid(logits)
   predictions[PredictionKey.CLASSES] = math_ops.to_int64(
       math_ops.greater(logits, 0))
   return predictions
 def LSTMCell(cls, x, mprev, cprev, weights):
   xm = array_ops.concat([x, mprev], 1)
   i_i, i_g, f_g, o_g = array_ops.split(
       value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1)
   new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid(
       i_g) * math_ops.tanh(i_i)
   new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0)
   new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c)
   return new_m, new_c
Beispiel #7
0
 def _logits_to_prediction(self, logits=None):
   predictions = {PedictionKey.LOGITS: logits}
   if self.logits_dimension == 1:
     predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
     logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
   predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits)
   # Workaround for argmax dropping the second demension.
   predictions[PedictionKey.CLASSES] = math_ops.to_int64(
       math_ops.greater(logits, 0))
   return predictions
Beispiel #8
0
 def __call__(self, inputs, state, scope=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     with vs.variable_scope(scope or type(self).__name__):  # "GRUCell"
         with vs.variable_scope("Gates"):  # Reset gate and update gate.
             # We start with bias of 1.0 to not reset and not update.
             r, u = array_ops.split(1, 2, linear([inputs, state], 2 * self._num_units, True, 1.0))
             r, u = sigmoid(r), sigmoid(u)
         with vs.variable_scope("Candidate"):
             c = tanh(linear([inputs, r * state], self._num_units, True))
         new_h = u * state + (1 - u) * c
     return new_h, new_h
Beispiel #9
0
 def __call__(self, inputs, state, scope=None):
     """Gated recurrent unit (GRU) with nunits cells."""
     with vs.variable_scope(scope or "gru_cell"):
         with vs.variable_scope("gates"):  # Reset gate and update gate.
             # We start with bias of 1.0 to not reset and not update.
             r, u = array_ops.split(1, 2, _linear([inputs, state], 2 * self._num_units, True, 1.0, scope=scope))
             r, u = sigmoid(r), sigmoid(u)
         with vs.variable_scope("candidate"):
             c = self._activation(_linear([inputs, r * state], self._num_units, True, scope=scope))
         new_h = u * state + (1 - u) * c
     return new_h, new_h
Beispiel #10
0
 def _logits_to_predictions(self, logits):
   """See `_MultiClassHead`."""
   predictions = {prediction_key.PredictionKey.LOGITS: logits}
   if self.logits_dimension == 1:
     predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid(
         logits)
     logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
   predictions[prediction_key.PredictionKey.PROBABILITIES] = math_ops.sigmoid(
       logits)
   predictions[prediction_key.PredictionKey.CLASSES] = math_ops.to_int64(
       math_ops.greater(logits, 0))
   return predictions
Beispiel #11
0
  def __call__(self, inputs, state, scope=None):
    """Recurrent Highway Network cell (RHN)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRHNCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        y = state
      else:
        y = array_ops.split(1, 1, state)
      assert self._recurrence_depth > 0 and type(self._recurrence_depth) is int
      # h_transform = [None] * self._recurrence_depth
      # t = [None] * self._recurrence_depth
      # s = [None] * self._recurrence_depth
      # concat = [None] * self._recurrence_depth
      # for i in range(self._recurrence_depth):
      #   if i == 0:
      #     concat[i] = _linear([inputs, h], 2 * self._num_units, True)
      #     # h = nonlinear transform, t = transfer gate
      #     h_transform[i], t[i] = array_ops.split(1, 2, concat[i])
      #     t[i] = sigmoid(t[i] + self._transfer_bias)
      #     s[i] = self._activation(h_transform[i]) * t[i] + \
      #         (1.0 - t[i]) * _linear([inputs], 1 * self._num_units, False)
      #   if i > 0:
      #     concat[i] = _linear([h], 2 * self._num_units, True)
      #     # h = nonlinear transform, t = transfer gate
      #     h_transform[i], t[i] = array_ops.split(1, 2, concat[i])
      #     t[i] = sigmoid(t[i] + self._transfer_bias)
      #     s[i] = self._activation(h_transform[i]) * t[i] + \
      #         (1.0 - t[i]) * s[i-1]

      # ALTERNATIVE IMPLEMENTATION:
      for i in range(self._recurrence_depth):
        if i == 0:
          concat = _linear([inputs, y], 2 * self._num_units, True)
          # h = nonlinear transform, t = transfer gate
          h, t = array_ops.split(1, 2, concat)
          t = sigmoid(t + self._transfer_bias)
          s = self._activation(h) * t + \
              (1.0 - t) * _linear([inputs], 1 * self._num_units, False)
        if i > 0:
          concat = _linear([s], 2 * self._num_units, True)
          # h = nonlinear transform, t = transfer gate
          h, t = array_ops.split(1, 2, concat)
          t = sigmoid(t + self._transfer_bias)
          s = self._activation(h) * t + \
              (1.0 - t) * s
      new_y = s

      if self._state_is_tuple:
        new_state = RHNStateTuple(new_y)
      else:
        new_state = array_ops.concat(1, new_y)
      return new_y
Beispiel #12
0
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM) with hypernetworks and layer normalization."""
    with vs.variable_scope(scope or type(self).__name__):
      # Parameters of gates are concatenated into one multiply for efficiency.
      total_h, total_c = tf.split(1, 2, state)
      h = total_h[:, 0:self._num_units]
      c = total_c[:, 0:self._num_units]

      self.hyper_state = tf.concat(1, [total_h[:, self._num_units:], total_c[:, self._num_units:]])
      hyper_input = tf.concat(1, [inputs, h])
      hyper_output, hyper_new_state = self.hyper_cell(hyper_input, self.hyper_state)
      self.hyper_output = hyper_output
      self.hyper_state = hyper_new_state

      input_below_ = rnn_cell._linear([inputs],
                                      4 * self._num_units, False, scope="out_1")
      input_below_ = self.hyper_norm(input_below_, 4 * self._num_units, scope="hyper_x")
      state_below_ = rnn_cell._linear([h],
                                      4 * self._num_units, False, scope="out_2")
      state_below_ = self.hyper_norm(state_below_, 4 * self._num_units, scope="hyper_h")

      if self.is_layer_norm:
        s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
        s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
        s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32)

        b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
        b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
        b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32)


        input_below_ = ln(input_below_, s1, b1)


        state_below_ = ln(state_below_, s2, b2)

      lstm_matrix = tf.add(input_below_, state_below_)
      i, j, f, o = array_ops.split(1, 4, lstm_matrix)
      new_c = (c * sigmoid(f) + sigmoid(i) *
               self._activation(j))

      # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now.
      # new_c_ = ln(new_c, s3, b3)
      new_c_ = new_c
      new_h = self._activation(new_c_) * sigmoid(o)

      hyper_h, hyper_c = tf.split(1, 2, hyper_new_state)
      new_total_h = tf.concat(1, [new_h, hyper_h])
      new_total_c = tf.concat(1, [new_c, hyper_c])
      new_total_state = tf.concat(1, [new_total_h, new_total_c])
      return new_h, new_total_state
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      c, h = array_ops.split(1, 2, state)
      concat = linear([inputs, h], 4 * self._num_units, True)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(1, 4, concat)

      new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j)
      new_h = tanh(new_c) * sigmoid(o)

      return new_h, array_ops.concat(1, [new_c, new_h])
Beispiel #14
0
    def __call__(self, inputs, state, scope=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        dtype = inputs.dtype
        batch_size, feature_size = inputs.get_shape().as_list()
        if self._use_tgate:
            # Time gate
            feature_size = feature_size - 1
            tvscope = vs.get_variable_scope()
            with vs.variable_scope(tvscope, initializer=None) as unit_scope:
                with vs.variable_scope(unit_scope) as time_gate_scope:
                    w_t1 = vs.get_variable(
                        "w_t1", shape=[1, self._num_units], dtype=dtype)
                    bias_t1 = vs.get_variable(
                        "bias_t1", [self._num_units], dtype=dtype,
                        initializer=init_ops.constant_initializer(0.0, dtype=dtype))
                    w_tx1 = vs.get_variable(
                        "w_tx1", shape=[feature_size, self._num_units], dtype=dtype)
                seq = tf.slice(inputs, begin=[0, 0], size=[batch_size, feature_size])
                delta_t = tf.slice(inputs, begin=[0, 56], size=[batch_size, 1])


                t1_act = (self._activation(math_ops.matmul(delta_t, w_t1)) +
                          math_ops.matmul(seq, w_tx1) + bias_t1)
                t1 = sigmoid(t1_act)
                inputs = seq
        # for initial state
        (state, state_decay) = state
        with vs.variable_scope("gates"):  # Reset gate and update gate.
            # We start with bias of 1.0 to not reset and not update.
            value = sigmoid(_linear(
                [inputs, state], 2 * self._num_units, True, 1.0))
            r, u = array_ops.split(value=value,
                                   num_or_size_splits=2,
                                   axis=1)
        with vs.variable_scope("candidate"):
            c = self._activation(_linear([inputs, r * state],
                                         self._num_units, True))
        new_h = u * state + (1 - u) * c

        if self._use_tgate:
            new_h_decay = u * t1 * state_decay + (1 - u * t1) * c
            new_state = (new_h, new_h_decay)
            new_state = (TGRUStateTuple(new_h, new_h_decay))
            new_h = tf.concat([new_h, new_h_decay], axis=1)
        else:
            new_state = (new_h, new_h)
            new_state = (TGRUStateTuple(new_h, new_h))

        return new_h, new_state
  def __call__(self, inputs, state, scope=None):
    """Gated recurrent unit (GRU) with nunits cells."""
    dim = self._num_units
    with vs.variable_scope(scope or type(self).__name__):  # "GRUCell"
      with vs.variable_scope("Gates"):  # Reset gate and update gate.
        # We start with bias of 1.0 to not reset and not update.
        with vs.variable_scope( "Layer_Parameters"):

          s1 = vs.get_variable("s1", initializer=tf.ones([2*dim]), dtype=tf.float32)
          s2 = vs.get_variable("s2", initializer=tf.ones([2*dim]), dtype=tf.float32)
          s3 = vs.get_variable("s3", initializer=tf.ones([dim]), dtype=tf.float32)
          s4 = vs.get_variable("s4", initializer=tf.ones([dim]), dtype=tf.float32)
          b1 = vs.get_variable("b1", initializer=tf.zeros([2*dim]), dtype=tf.float32)
          b2 = vs.get_variable("b2", initializer=tf.zeros([2*dim]), dtype=tf.float32)
          b3 = vs.get_variable("b3", initializer=tf.zeros([dim]), dtype=tf.float32)
          b4 = vs.get_variable("b4", initializer=tf.zeros([dim]), dtype=tf.float32)


          # Code below initialized for all cells
          # s1 = tf.Variable(tf.ones([2 * dim]), name="s1")
          # s2 = tf.Variable(tf.ones([2 * dim]), name="s2")
          # s3 = tf.Variable(tf.ones([dim]), name="s3")
          # s4 = tf.Variable(tf.ones([dim]), name="s4")
          # b1 = tf.Variable(tf.zeros([2 * dim]), name="b1")
          # b2 = tf.Variable(tf.zeros([2 * dim]), name="b2")
          # b3 = tf.Variable(tf.zeros([dim]), name="b3")
          # b4 = tf.Variable(tf.zeros([dim]), name="b4")

        input_below_ = rnn_cell._linear([inputs],
                               2 * self._num_units, False, scope="out_1")
        input_below_ = ln(input_below_, s1, b1)
        state_below_ = rnn_cell._linear([state],
                               2 * self._num_units, False, scope="out_2")
        state_below_ = ln(state_below_, s2, b2)
        out =tf.add(input_below_, state_below_)
        r, u = array_ops.split(1, 2, out)
        r, u = sigmoid(r), sigmoid(u)

      with vs.variable_scope("Candidate"):
          input_below_x = rnn_cell._linear([inputs],
                                           self._num_units, False, scope="out_3")
          input_below_x = ln(input_below_x, s3, b3)
          state_below_x = rnn_cell._linear([state],
                                           self._num_units, False, scope="out_4")
          state_below_x = ln(state_below_x, s4, b4)
          c_pre = tf.add(input_below_x,r * state_below_x)
          c = self._activation(c_pre)
      new_h = u * state + (1 - u) * c
    return new_h, new_h
  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      if self._state_is_tuple:
        c, h = state
      else:
        c, h = array_ops.split(1, 2, state)

      s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
      s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32)
      s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32)

      b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
      b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32)
      b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32)

      # s1 = tf.Variable(tf.ones([4 * self._num_units]), name="s1")
      # s2 = tf.Variable(tf.ones([4 * self._num_units]), name="s2")
      # s3 = tf.Variable(tf.ones([self._num_units]), name="s3")
      #
      # b1 = tf.Variable(tf.zeros([4 * self._num_units]), name="b1")
      # b2 = tf.Variable(tf.zeros([4 * self._num_units]), name="b2")
      # b3 = tf.Variable(tf.zeros([self._num_units]), name="b3")

      input_below_ = rnn_cell._linear([inputs],
                                      4 * self._num_units, False, scope="out_1")
      input_below_ = ln(input_below_, s1, b1)
      state_below_ = rnn_cell._linear([h],
                                      4 * self._num_units, False, scope="out_2")
      state_below_ = ln(state_below_, s2, b2)
      lstm_matrix = tf.add(input_below_, state_below_)

      i, j, f, o = array_ops.split(1, 4, lstm_matrix)

      new_c = (c * sigmoid(f) + sigmoid(i) *
               self._activation(j))

      # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now.
      # new_c_ = ln(new_c, s3, b3)
      new_c_ = new_c
      new_h = self._activation(new_c_) * sigmoid(o)

      if self._state_is_tuple:
        new_state = LSTMStateTuple(new_c, new_h)
      else:
        new_state = array_ops.concat(1, [new_c, new_h])
      return new_h, new_state
  def GetParams(self):
    """Test for multi connection neighboring nodes wiring tests in TF-TRT."""
    dtype = dtypes.float32
    input_name = "input"
    input_dims = [2, 3, 7, 5]
    g = ops.Graph()
    with g.as_default():
      x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name)
      e = constant_op.constant(
          np.random.normal(.05, .005, [3, 2, 3, 4]),
          name="weights",
          dtype=dtype)
      conv = nn.conv2d(
          input=x,
          filter=e,
          data_format="NCHW",
          strides=[1, 1, 1, 1],
          padding="VALID",
          name="conv")
      b = constant_op.constant(
          np.random.normal(2.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
      t = conv + b

      b = constant_op.constant(
          np.random.normal(5.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
      q = conv - b
      edge = math_ops.sigmoid(q)

      b = constant_op.constant(
          np.random.normal(5.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype)
      d = b + conv
      edge3 = math_ops.sigmoid(d)

      edge1 = gen_math_ops.tan(conv)
      t = t - edge1
      q = q + edge
      t = t + q
      t = t + d
      t = t - edge3
      array_ops.squeeze(t, name=self.output_name)
    return trt_test.TfTrtIntegrationTestParams(
        gdef=g.as_graph_def(),
        input_names=[input_name],
        input_dims=[input_dims],
        expected_engines=["my_trt_op_0", "my_trt_op_1"],
        expected_output_dims=(2, 4, 5, 4),
        allclose_atol=1.e-03,
        allclose_rtol=1.e-03)
Beispiel #18
0
 def _Model(x):
   w = variable_scope.get_variable(
       "w", (64, 64),
       initializer=init_ops.random_uniform_initializer(seed=312))
   b = variable_scope.get_variable(
       "b", (64), initializer=init_ops.zeros_initializer()),
   return math_ops.sigmoid(math_ops.matmul(x, w) + b)
Beispiel #19
0
  def predictions(self, examples):
    """Add operations to compute predictions by the model.

    If logistic_loss is being used, predicted probabilities are returned.
    Otherwise, (raw) linear predictions (w*x) are returned.

    Args:
      examples: Examples to compute predictions on.

    Returns:
      An Operation that computes the predictions for examples.

    Raises:
      ValueError: if examples are not well defined.
    """
    self._assertSpecified(
        ['example_weights', 'sparse_features', 'dense_features'], examples)
    self._assertList(['sparse_features', 'dense_features'], examples)

    result = self._linear_predictions(examples)
    if self._options['loss_type'] == 'logistic_loss':
      # Convert logits to probability for logistic loss predictions.
      with name_scope('sdca/logistic_prediction'):
        result = math_ops.sigmoid(result)
    return result
Beispiel #20
0
 def call(self, inputs, state):
   """Gated recurrent unit (GRU) with nunits cells."""
   with vs.variable_scope("gates"):  # Reset gate and update gate.
     # We start with bias of 1.0 to not reset and not update.
     bias_ones = self._bias_initializer
     if self._bias_initializer is None:
       dtype = inputs.dtype
       bias_ones = init_ops.constant_initializer(1.0, dtype=dtype)
     # pylint: disable=protected-access
     value = math_ops.sigmoid(
         rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True,
                               bias_ones, self._kernel_initializer))
     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
     # pylint: enable=protected-access
   with vs.variable_scope("candidate"):
     # pylint: disable=protected-access
     with vs.variable_scope("input_projection"):
       hi = rnn_cell_impl._linear(inputs, self._num_units, True,
                                  self._bias_initializer,
                                  self._kernel_initializer)
     with vs.variable_scope("hidden_projection"):
       hh = r * (rnn_cell_impl._linear(state, self._num_units, True,
                                       self._bias_initializer,
                                       self._kernel_initializer))
     # pylint: enable=protected-access
     c = self._activation(hi + hh)
   new_h = u * state + (1 - u) * c
   return new_h, new_h
Beispiel #21
0
  def embed(self, func, embedding_classes, embedding_size, inputs, dtype=None, scope=None,
            keep_prob=1.0, initializer=None):
    embedder_cell = func(self._cell, embedding_classes, embedding_size, initializer=initializer)

    # Like rnn(..) in rnn.py, but we call only the Embedder, not the RNN cell
    outputs = []
    with vs.variable_scope(scope or "Embedder") as varscope:
      if varscope.caching_device is None:
        varscope.set_caching_device(lambda op: op.device)

      for time, input_ in enumerate(inputs):
        if time > 0: vs.get_variable_scope().reuse_variables()
        embedding = embedder_cell.__call__(input_, scope)
        if keep_prob < 1:
          embedding = tf.nn.dropout(embedding, keep_prob)

        # annotation = C~_t = tanh ( E(x_t) + b_c)
        b_c = tf.get_variable("annotation_b", [embedding_size])
        annotation = tanh(tf.nn.bias_add(embedding, b_c))

        # weighted annotation = i_t * C~_t
        # i = sigmoid ( E(x_t) + b_i)
        b_i = tf.get_variable("input_b", [embedding_size])
        i = sigmoid(tf.nn.bias_add(embedding, b_i))
        w_annotation = i * annotation
        outputs.append(w_annotation)

      # return empty state, will be initialized by decoder
      batch_size = array_ops.shape(inputs[0])[0]
      state = self._cell.zero_state(batch_size, dtype)
      return (outputs, state)
Beispiel #22
0
    def call(self, inputs, state, att_score=None):
        """Gated recurrent unit (GRU) with nunits cells."""
        if self._gate_linear is None:
            bias_ones = self._bias_initializer
            if self._bias_initializer is None:
                bias_ones = init_ops.constant_initializer(
                    1.0, dtype=inputs.dtype)
            with vs.variable_scope("gates"):  # Reset gate and update gate.
                self._gate_linear = _Linear(
                    [inputs, state],
                    2 * self._num_units,
                    True,
                    bias_initializer=bias_ones,
                    kernel_initializer=self._kernel_initializer)

        value = math_ops.sigmoid(self._gate_linear([inputs, state]))
        r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)

        r_state = r * state
        if self._candidate_linear is None:
            with vs.variable_scope("candidate"):
                self._candidate_linear = _Linear(
                    [inputs, r_state],
                    self._num_units,
                    True,
                    bias_initializer=self._bias_initializer,
                    kernel_initializer=self._kernel_initializer)
        c = self._activation(self._candidate_linear([inputs, r_state]))
        u = (1.0 - att_score) * u
        new_h = u * state + (1 - u) * c
        return new_h, new_h
  def _get_eval_ops(self, features, targets, metrics=None):
    """See base class."""
    logits = self._logits(features)
    result = {"loss": metrics_lib.streaming_mean(self._loss(
        logits, targets, features))}

    # Adds default metrics.
    if metrics is None:
      # TODO(b/29366811): This currently results in both an "accuracy" and an
      # "accuracy/threshold_0.500000_mean" metric for binary classification.
      metrics = {("accuracy", "classes"): metrics_lib.streaming_accuracy}

    # Adds additional useful metrics for the special case of binary
    # classification.
    # TODO(zakaria): Move LogisticRegressor.get_default_metrics to metrics
    #   and handle eval metric from targetcolumn.
    if self._target_column.num_label_columns == 1:
      predictions = math_ops.sigmoid(logits)
      targets_float = math_ops.to_float(targets)
      default_metrics = (
          logistic_regressor.LogisticRegressor.get_default_metrics())
      for metric_name, metric_op in default_metrics.items():
        result[metric_name] = metric_op(predictions, targets_float)

    if metrics:
      class_metrics = {}
      proba_metrics = {}
      for name, metric_op in six.iteritems(metrics):
        if isinstance(name, tuple):
          if len(name) != 2:
            raise ValueError("Ignoring metric {}. It returned a tuple with "
                             "len {}, expected 2.".format(name, len(name)))
          else:
            if name[1] not in ["classes", "probabilities"]:
              raise ValueError("Ignoring metric {}. The 2nd element of its "
                               "name should be either 'classes' or "
                               "'probabilities'.".format(name))
            elif name[1] == "classes":
              class_metrics[name[0]] = metric_op
            else:
              proba_metrics[name[0]] = metric_op
        elif isinstance(name, str):
          class_metrics[name] = metric_op
        else:
          raise ValueError("Ignoring metric {}. Its name is not in the correct "
                           "form.".format(name))
      if class_metrics:
        predictions = self._target_column.logits_to_predictions(logits,
                                                                proba=False)
        result.update(self._run_metrics(predictions, targets, class_metrics,
                                        self._target_column.get_weight_tensor(
                                            features)))
      if proba_metrics:
        predictions = self._target_column.logits_to_predictions(logits,
                                                                proba=True)
        result.update(self._run_metrics(predictions, targets, proba_metrics,
                                        self._target_column.get_weight_tensor(
                                            features)))

    return result
Beispiel #24
0
  def create_estimator_spec(
      self, features, mode, logits, labels=None, train_op_fn=None):
    """See `Head`."""
    with ops.name_scope('head'):
      logits = head_lib._check_logits(logits, self.logits_dimension)  # pylint:disable=protected-access

      # Predict.
      pred_keys = prediction_keys.PredictionKeys
      with ops.name_scope(None, 'predictions', (logits,)):
        probabilities = math_ops.sigmoid(logits, name=pred_keys.PROBABILITIES)
        predictions = {
            pred_keys.LOGITS: logits,
            pred_keys.PROBABILITIES: probabilities,
        }
      if mode == model_fn.ModeKeys.PREDICT:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.PREDICT,
            predictions=predictions,
            export_outputs={
                '': export_output.ClassificationOutput(scores=probabilities)
            })

      # Eval.
      unweighted_loss, processed_labels = self.create_loss(
          features=features, mode=mode, logits=logits, labels=labels)
      # Averages loss over classes.
      per_example_loss = math_ops.reduce_mean(
          unweighted_loss, axis=-1, keep_dims=True)
      weights = head_lib._weights(features, self._weight_column)  # pylint:disable=protected-access
      training_loss = losses.compute_weighted_loss(
          per_example_loss, weights=weights, reduction=losses.Reduction.SUM)
      if mode == model_fn.ModeKeys.EVAL:
        return model_fn.EstimatorSpec(
            mode=model_fn.ModeKeys.EVAL,
            predictions=predictions,
            loss=training_loss,
            eval_metric_ops=self._eval_metric_ops(
                labels=processed_labels,
                probabilities=probabilities,
                weights=weights,
                per_example_loss=per_example_loss))

      # Train.
      if train_op_fn is None:
        raise ValueError('train_op_fn can not be None.')
    with ops.name_scope(''):
      summary.scalar(
          head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS),  # pylint:disable=protected-access
          training_loss)
      summary.scalar(
          head_lib._summary_key(  # pylint:disable=protected-access
              self._name, metric_keys.MetricKeys.LOSS_MEAN),
          losses.compute_weighted_loss(
              unweighted_loss, weights=weights,
              reduction=losses.Reduction.MEAN))
    return model_fn.EstimatorSpec(
        mode=model_fn.ModeKeys.TRAIN,
        predictions=predictions,
        loss=training_loss,
        train_op=train_op_fn(training_loss))
Beispiel #25
0
  def __init__(self,
               logits=None,
               p=None,
               dtype=dtypes.int32,
               validate_args=True,
               allow_nan_stats=False,
               name="Bernoulli"):
    """Construct Bernoulli distributions.

    Args:
      logits: An N-D `Tensor` representing the log-odds
        of a positive event. Each entry in the `Tensor` parametrizes
        an independent Bernoulli distribution where the probability of an event
        is sigmoid(logits).
      p: An N-D `Tensor` representing the probability of a positive
          event. Each entry in the `Tensor` parameterizes an independent
          Bernoulli distribution.
      dtype: dtype for samples.
      validate_args: Whether to assert that `0 <= p <= 1`. If not validate_args,
       `log_pmf` may return nans.
      allow_nan_stats:  Boolean, default False.  If False, raise an exception if
        a statistic (e.g. mean/mode/etc...) is undefined for any batch member.
        If True, batch members with valid parameters leading to undefined
        statistics will return NaN for this statistic.
      name: A name for this distribution.

    Raises:
      ValueError: If p and logits are passed, or if neither are passed.
    """
    self._allow_nan_stats = allow_nan_stats
    self._name = name
    self._dtype = dtype
    self._validate_args = validate_args
    check_op = check_ops.assert_less_equal
    if p is None and logits is None:
      raise ValueError("Must pass p or logits.")
    elif p is not None and logits is not None:
      raise ValueError("Must pass either p or logits, not both.")
    elif p is None:
      with ops.op_scope([logits], name):
        self._logits = array_ops.identity(logits, name="logits")
      with ops.name_scope(name):
        with ops.name_scope("p"):
          self._p = math_ops.sigmoid(self._logits)
    elif logits is None:
      with ops.name_scope(name):
        with ops.name_scope("p"):
          p = array_ops.identity(p)
          one = constant_op.constant(1., p.dtype)
          zero = constant_op.constant(0., p.dtype)
          self._p = control_flow_ops.with_dependencies(
              [check_op(p, one), check_op(zero, p)] if validate_args else [], p)
        with ops.name_scope("logits"):
          self._logits = math_ops.log(self._p) - math_ops.log(1. - self._p)
    with ops.name_scope(name):
      with ops.name_scope("q"):
        self._q = 1. - self._p
    self._batch_shape = array_ops.shape(self._logits)
    self._event_shape = array_ops.constant([], dtype=dtypes.int32)
Beispiel #26
0
def _kl_bernoulli_bernoulli(a, b, name=None):
  """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli.

  Args:
    a: instance of a Bernoulli distribution object.
    b: instance of a Bernoulli distribution object.
    name: (optional) Name to use for created operations.
      default is "kl_bernoulli_bernoulli".

  Returns:
    Batchwise KL(a || b)
  """
  with ops.name_scope(name, "kl_bernoulli_bernoulli", [a.logits, b.logits]):
    return (math_ops.sigmoid(a.logits) * (-nn.softplus(-a.logits) +
                                          nn.softplus(-b.logits)) +
            math_ops.sigmoid(-a.logits) * (-nn.softplus(a.logits) +
                                           nn.softplus(b.logits)))
Beispiel #27
0
    def __call__(self, inputs, state, scope=None):
        with vs.variable_scope(scope or type(self).__name__):
            batch_size = inputs.get_shape().with_rank(2)[0]
            input_size = inputs.get_shape().with_rank(2)[1]
            if self.W is None:
                self.W = vs.get_variable("W", [input_size, self._num_units], initializer=self.input_weights_init)
            if self.U is None:
                self.U = vs.get_variable("U", [self._num_units, self._num_units], initializer=self.recc_weights_init)
            if self.bias is None:
                self.bias = vs.get_variable("Bias", [self._num_units], initializer=init_ops.constant_initializer(0.0))

            if self._sensitivity:
                if self.W_s is None: 
                    self.W_s = vs.get_variable("W_s", [input_size, self._num_units], initializer=self.input_weights_init)
                if self.U_s is None:
                    self.U_s = vs.get_variable("U_s", [self._num_units, self._num_units], initializer=self.recc_weights_init)
                if self.bias_s is None:
                    self.bias_s = vs.get_variable("Bias_s", [self._num_units], initializer=init_ops.constant_initializer(0.0))
                s = sigmoid(math_ops.matmul(inputs, self.W_s) + math_ops.matmul(state, self.U_s) + self.bias_s)
                # s *= 3.0
            else:
                s = 1.0
            s = 1.0
            state_cos = s*tf.cos(state)
            weighted_input =  math_ops.matmul(inputs, self.W) + math_ops.matmul(state, self.U) + self.bias

            new_state = s - state_cos + (s + state_cos) * weighted_input
            if not self._update_gate:
                state = state + self._dt * new_state
            else:
                if self.W_u is None: 
                    self.W_u = vs.get_variable("W_u", [input_size, self._num_units], initializer=self.input_weights_init)
                if self.U_u is None:
                    self.U_u = vs.get_variable("U_u", [self._num_units, self._num_units], initializer=self.recc_weights_init)
                if self.bias_u is None:
                    self.bias_u = vs.get_variable("Bias_u", [self._num_units], initializer=init_ops.constant_initializer(0.0))
                u = sigmoid(math_ops.matmul(inputs, self.W_u) + math_ops.matmul(state, self.U_u) + self.bias_u)
                state = u * state + (1.0-u) * self._dt * new_state
                self.update_info.append(u)
            
            # self.sigma = vs.get_variable("sigma", [self._num_units], initializer=init_ops.constant_initializer(1.0))
            output = self._activation(state, self._sigma)
            
            self.states_info.append(state)

        return output, state
 def _cdf(self, positive_counts):
   if self.validate_args:
     positive_counts = math_ops.floor(
         distribution_util.embed_check_nonnegative_discrete(
             positive_counts, check_integer=False))
   return math_ops.betainc(
       self.total_count, positive_counts + 1.,
       math_ops.sigmoid(-self.logits))
  def __call__(self, inputs, state, scope=None):
    """Convolutional Long short-term memory cell (ConvLSTM)."""
    with vs.variable_scope(scope or self.name): # "ConvLSTMCell"
      c, h = array_ops.split(3, 2, state)

      # batch_size * height * width * channel
      concat = _conv([inputs, h], 4 * self.hidden_num, self.filter_size)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(3, 4, concat)

      new_c = (c * sigmoid(f + self.forget_bias) + sigmoid(i) *
               self.activation(j))
      new_h = self.activation(new_c) * sigmoid(o)
      new_state = array_ops.concat(3, [new_c, new_h])

      return new_h, new_state
Beispiel #30
0
 def __call__(self,inputs, state, scope=None):
     inputs = convert_to_tensor(inputs)
     with vs.variable_scope(scope or type(self).__name__):
         W_z = tf.Variable(self.W_z,name="W_z")
         W_r = tf.Variable(self.W_r,name="W_r")
         W_h = tf.Variable(self.W_h,name="W_h")
         U_z = tf.Variable(self.U_z,name="U_z")
         U_r = tf.Variable(self.U_r,name="U_r")
         U_h = tf.Variable(self.U_h,name="U_h")
         b_z = tf.Variable(self.b_z,name="b_z")
         b_r = tf.Variable(self.b_r,name="b_r")
         b_h = tf.Variable(self.b_h,name="b_h")
         z = math_ops.sigmoid(math_ops.matmul(inputs,W_z*self.W_z_mask)+math_ops.matmul(state,self.U_z_mask*U_z)+self.b_z)
         r = math_ops.sigmoid(math_ops.matmul(inputs,W_r*self.W_r_mask)+math_ops.matmul(state,self.U_r_mask*U_r)+self.b_r)
         hh = math_ops.tanh(math_ops.matmul(inputs,W_h)+math_ops.matmul(state*r,U_h)+self.b_h)
         h = (1-z)*hh + z*state
     return h,h
Beispiel #31
0
def get_logits_and_prob(logits=None,
                        p=None,
                        multidimensional=False,
                        validate_args=True,
                        name=None):
    """Converts logits to probabilities and vice-versa, and returns both.

  Args:
    logits: Numeric `Tensor` representing log-odds.
    p: Numeric `Tensor` representing probabilities.
    multidimensional: Given `p` a [N1, N2, ... k] dimensional tensor,
      whether the last dimension represents the probability between k classes.
      This will additionally assert that the values in the last dimension
      sum to one. If `False`, will instead assert that each value is in
      `[0, 1]`.
    validate_args: Whether to assert `0 <= p <= 1` if multidimensional is
      `False`, otherwise that the last dimension of `p` sums to one.
    name: A name for this operation (optional).

  Returns:
    Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then
    the corresponding entry in the returned logits will be `-Inf` and `Inf`
    respectively.

  Raises:
    ValueError: if neither `p` nor `logits` were passed in, or both were.
  """
    if p is None and logits is None:
        raise ValueError("Must pass p or logits.")
    elif p is not None and logits is not None:
        raise ValueError("Must pass either p or logits, not both.")
    elif p is None:
        with ops.name_scope(name, values=[logits]):
            logits = array_ops.identity(logits, name="logits")
        with ops.name_scope(name):
            with ops.name_scope("p"):
                p = math_ops.sigmoid(logits)
    elif logits is None:
        with ops.name_scope(name):
            with ops.name_scope("p"):
                p = array_ops.identity(p)
                if validate_args:
                    one = constant_op.constant(1., p.dtype)
                    dependencies = [check_ops.assert_non_negative(p)]
                    if multidimensional:
                        dependencies += [
                            assert_close(math_ops.reduce_sum(
                                p, reduction_indices=[-1]),
                                         one,
                                         message="p does not sum to 1.")
                        ]
                    else:
                        dependencies += [
                            check_ops.assert_less_equal(
                                p,
                                one,
                                message="p has components greater than 1.")
                        ]
                    p = control_flow_ops.with_dependencies(dependencies, p)
            with ops.name_scope("logits"):
                logits = math_ops.log(p) - math_ops.log(1. - p)
    return (logits, p)
Beispiel #32
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: state Tensor, 2D, batch x state_size.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A 2D, batch x output_dim, Tensor representing the output of the LSTM
        after reading "inputs" when previous state was "state".
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - A 2D, batch x state_size, Tensor representing the new state of LSTM
        after reading "inputs" when previous state was "state".
    Raises:
      ValueError: if an input_size was specified and the provided inputs have
        a different dimension.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
        m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

        dtype = inputs.dtype
        actual_input_size = inputs.get_shape().as_list()[1]
        if self._input_size and self._input_size != actual_input_size:
            raise ValueError(
                "Actual input size not same as specified: %d vs %d." %
                actual_input_size, self._input_size)
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            concat_w = _get_concat_variable(
                "W", [actual_input_size + num_proj, 4 * self._num_units],
                dtype, self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=array_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev + sigmoid(i + w_i_diag * c_prev) * tanh(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * tanh(j))

            if self._cell_clip is not None:
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * tanh(c)
            else:
                m = sigmoid(o) * tanh(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)

        return m, array_ops.concat(1, [c, m])
Beispiel #33
0
def _test_tanh(data):
    """ One iteration of TANH """
    with tf.Graph().as_default():
        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
        out = math_ops.sigmoid(in_data)
        compare_tflite_with_tvm(data, 'Placeholder:0', [in_data], [out])
 def testBernoulliWithSigmoidProbs(self):
     p = np.array([8.3, 4.2])
     dist = bernoulli.BernoulliWithSigmoidProbs(logits=p)
     with self.test_session():
         self.assertAllClose(math_ops.sigmoid(p).eval(), dist.probs.eval())
Beispiel #35
0
    def call(self, inputs, state):
        """
        Run one time step of the cell. That is, given the current inputs and the state from the last time step,
        calculate the current state and cell output.

        You will notice that TensorFlow LSTMCell has a lot of other features. But we will not try them. Focus on the
        very basic LSTM functionality.

        Hint 1: If you try to figure out the tensor shapes, use print(a.get_shape()) to see the shape.

        Hint 2: In LSTM there exist both matrix multiplication and element-wise multiplication. Try not to mix them.

        :param inputs: The input at the current time step. The last dimension of it should be 1.
        :param state:  The state value of the cell from the last time step. The state size can be found from function
                       state_size(self).
        :return: A tuple containing (output, new_state). For details check TensorFlow LSTMCell class.
        """
        #############################################
        #           TODO: YOUR CODE HERE            #
        #############################################
        params = self.params

        c_prev = array_ops.slice(state, [0, 0], [-1, params[0]])
        h_prev = array_ops.slice(state, [0, params[0]], [-1, params[1]])

        W = self.W
        b = self.b

        W_fh = W['W_fh']
        W_ih = W['W_ih']
        W_ch = W['W_ch']
        W_oh = W['W_oh']
        W_fi = W['W_fi']
        W_ii = W['W_ii']
        W_ci = W['W_ci']
        W_oi = W['W_oi']
        W_h = W['W_h']
        W_fc = W['W_fc']
        W_ic = W['W_ic']
        W_oc = W['W_oc']

        b_f = b['b_f']
        b_i = b['b_i']
        b_c = b['b_c']
        b_o = b['b_o']

        f = math_ops.sigmoid(
            tf.matmul(h_prev, W_fh) + tf.multiply(inputs, W_fi) + b_f +
            tf.matmul(c_prev, W_fc))
        i = math_ops.sigmoid(
            tf.matmul(h_prev, W_ih) + tf.multiply(inputs, W_ii) + b_i +
            tf.matmul(c_prev, W_ic))
        _c = math_ops.tanh(
            tf.matmul(h_prev, W_ch) + tf.multiply(inputs, W_ci) + b_c)
        c = f * c_prev + i * _c
        o = math_ops.sigmoid(
            tf.matmul(h_prev, W_oh) + tf.multiply(inputs, W_oi) + b_o +
            tf.matmul(c, W_oc))

        h = o * math_ops.tanh(c)
        h = tf.matmul(h, W_h)

        new_state = (array_ops.concat([c, h], 1))
        output = h

        return output, new_state
Beispiel #36
0
  def call(self, inputs, state):
    """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.

    Returns:
      A tuple containing:

      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
    num_proj = self._num_units if self._num_proj is None else self._num_proj

    if self._state_is_tuple:
      (c_prev, m_prev) = state
    else:
      c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
      m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj])

    dtype = inputs.dtype
    input_size = inputs.get_shape().with_rank(2)[1]
    if input_size.value is None:
      raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
      if self._num_unit_shards is not None:
        unit_scope.set_partitioner(
            partitioned_variables.fixed_size_partitioner(
                self._num_unit_shards))
      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True)
      i, j, f, o = array_ops.split(
          value=lstm_matrix, num_or_size_splits=4, axis=1)
      # Diagonal connections
      if self._use_peepholes:
        with vs.variable_scope(unit_scope) as projection_scope:
          if self._num_unit_shards is not None:
            projection_scope.set_partitioner(None)
          w_f_diag = vs.get_variable(
              "w_f_diag", shape=[self._num_units], dtype=dtype)
          w_i_diag = vs.get_variable(
              "w_i_diag", shape=[self._num_units], dtype=dtype)
          w_o_diag = vs.get_variable(
              "w_o_diag", shape=[self._num_units], dtype=dtype)

      if self._use_peepholes:
        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
      else:
        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
             self._activation(j))

      if self._cell_clip is not None:
        # pylint: disable=invalid-unary-operand-type
        c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip)
        # pylint: enable=invalid-unary-operand-type
      if self._use_peepholes:
        m = sigmoid(o + w_o_diag * c) * self._activation(c)
      else:
        m = sigmoid(o) * self._activation(c)

      if self._num_proj is not None:
        with vs.variable_scope("projection") as proj_scope:
          if self._num_proj_shards is not None:
            proj_scope.set_partitioner(
                partitioned_variables.fixed_size_partitioner(
                    self._num_proj_shards))
          m = _linear(m, self._num_proj, bias=False)

        if self._proj_clip is not None:
          # pylint: disable=invalid-unary-operand-type
          m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip)
          # pylint: enable=invalid-unary-operand-type

    new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else
                 array_ops.concat([c, m], 1))
    return m, new_state
    def __call__(self, inputs, state, scope=None):
        """ Phased long short-term memory cell (P-LSTM)."""
        with vs.variable_scope(scope or type(self).__name__):
            # Parameters of gates are concatenated into one multiply for efficiency.
            c_prev, h_prev = state

            # (batch_size, seq_len, 2)
            # NB: here we explicitly give t as input.
            x = tf.reshape(inputs[:, 0], (-1, 1))
            t = inputs[:, 1][
                -1]  # Now we only accept one id. We have a batch so it's a bit more complex.

            # maybe the information should come from the outside. To be defined later.

            concat = _linear([x, h_prev], 4 * self._num_units, True)
            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(value=concat,
                                         num_or_size_splits=4,
                                         axis=1)

            dtype = inputs.dtype
            tau = vs.get_variable('tau',
                                  shape=[self._num_units],
                                  initializer=random_exp_initializer(
                                      0, self.tau_init),
                                  dtype=dtype)

            r_on = vs.get_variable('r_on',
                                   shape=[self._num_units],
                                   initializer=init_ops.constant_initializer(
                                       self.r_on_init),
                                   dtype=dtype)

            s = vs.get_variable(
                's',
                shape=[self._num_units],
                initializer=init_ops.random_uniform_initializer(
                    0., tau.initialized_value()),
                dtype=dtype)

            times = tf.tile(tf.reshape(t, [-1, 1]), [1, self._num_units])
            phase = phi(times, s, tau)
            kappa = time_gate_fast(phase, r_on, self._leak_rate,
                                   self._training_phase)

            w_o_peephole = None
            if self._use_peepholes:
                w_i_peephole = vs.get_variable('W_I_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)
                w_f_peephole = vs.get_variable('W_F_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)
                w_o_peephole = vs.get_variable('W_O_peephole',
                                               shape=[self._num_units],
                                               dtype=dtype)
                f += w_f_peephole * c_prev
                i += w_i_peephole * c_prev

            new_c_tilde = sigmoid(f) * c_prev + sigmoid(i) * self._activation(
                j)
            if self._use_peepholes:
                o += w_o_peephole * new_c_tilde

            new_h_tilde = sigmoid(o) * self._activation(new_c_tilde)
            """
            Hi all,
            Yes, Philippe, you are correct in that Equation 4 should reference c_tilde and not c.
            I can add a point to the paper to mention that, and will update Figure 1 so the line is
            correctly drawn to c_tilde instead. The intuition here is that the gates should be blind
            to the effect of the khronos gate; input, forget and output gate should all operate as if
            the cell were a normal LSTM cell, while the khronos gate allows it to either operate or
            not operate (and then linearly interpolates between these two states). If the output gate
            is influenced by the khronos gate (if the peepholes reference c instead of c_tilde), then
            the PLSTM would no longer be a gated LSTM cell, but somehow be self-dependent on the time gate's actual operation.
            I think everyone's right in that it wouldn't influence much -- but it should be updated in
            the paper. Thanks very much for pointing out the issue, Philippe!
            -Danny"""

            # Apply Khronos gate
            new_h = kappa * new_h_tilde + (1 - kappa) * h_prev
            new_c = kappa * new_c_tilde + (1 - kappa) * c_prev
            new_state = (new_c, new_h)
            return new_h, new_state
Beispiel #38
0
def get_logits_and_probs(logits=None,
                         probs=None,
                         multidimensional=False,
                         validate_args=False,
                         name="get_logits_and_probs"):
    """Converts logit to probabilities (or vice-versa), and returns both.

  Args:
    logits: Floating-point `Tensor` representing log-odds.
    probs: Floating-point `Tensor` representing probabilities.
    multidimensional: Python `bool`, default `False`.
      If `True`, represents whether the last dimension of `logits` or `probs`,
      a `[N1, N2, ...  k]` dimensional tensor, representing the
      logit or probability of `shape[-1]` classes.
    validate_args: Python `bool`, default `False`. When `True`, either assert
      `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension
      of `probs` sums to one.
    name: A name for this operation (optional).

  Returns:
    logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or
      `1`, then the corresponding entry in the returned logit will be `-Inf` and
      `Inf` respectively.

  Raises:
    ValueError: if neither `probs` nor `logits` were passed in, or both were.
  """
    with ops.name_scope(name, values=[probs, logits]):
        if (probs is None) == (logits is None):
            raise ValueError("Must pass probs or logits, but not both.")

        if probs is None:
            logits = ops.convert_to_tensor(logits, name="logits")
            if not logits.dtype.is_floating:
                raise TypeError("logits must having floating type.")
            # We can early return since we constructed probs and therefore know
            # they're valid.
            if multidimensional:
                if validate_args:
                    logits = embed_check_categorical_event_shape(logits)
                return logits, nn.softmax(logits, name="probs")
            return logits, math_ops.sigmoid(logits, name="probs")

        probs = ops.convert_to_tensor(probs, name="probs")
        if not probs.dtype.is_floating:
            raise TypeError("probs must having floating type.")

        if validate_args:
            with ops.name_scope("validate_probs"):
                one = constant_op.constant(1., probs.dtype)
                dependencies = [check_ops.assert_non_negative(probs)]
                if multidimensional:
                    probs = embed_check_categorical_event_shape(probs)
                    dependencies += [
                        assert_close(math_ops.reduce_sum(probs, -1),
                                     one,
                                     message="probs does not sum to 1.")
                    ]
                else:
                    dependencies += [
                        check_ops.assert_less_equal(
                            probs,
                            one,
                            message="probs has components greater than 1.")
                    ]
                probs = control_flow_ops.with_dependencies(dependencies, probs)

        with ops.name_scope("logits"):
            if multidimensional:
                # Here we don't compute the multidimensional case, in a manner
                # consistent with respect to the unidimensional case. We do so
                # following the TF convention. Typically, you might expect to see
                # logits = log(probs) - log(probs[pivot]). A side-effect of
                # being consistent with the TF approach is that the unidimensional case
                # implicitly handles the second dimension but the multidimensional case
                # explicitly keeps the pivot dimension.
                return math_ops.log(probs), probs
            return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
Beispiel #39
0
    def predictions(self, logits, keys=None):
        """Return predictions based on keys. See `base_head.Head` for details.

    Args:
      logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`.
        For many applications, the shape is `[batch_size, logits_dimension]`.
      keys: a list or tuple of prediction keys. Each key can be either the class
        variable of prediction_keys.PredictionKeys or its string value, such as:
        prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified,
        it will return the predictions for all valid keys.

    Returns:
      A dict of predictions.
    """
        pred_keys = prediction_keys.PredictionKeys
        valid_keys = [
            pred_keys.LOGITS, pred_keys.LOGISTIC, pred_keys.PROBABILITIES,
            pred_keys.CLASS_IDS, pred_keys.CLASSES, pred_keys.ALL_CLASS_IDS,
            pred_keys.ALL_CLASSES
        ]

        if keys:
            base_head.check_prediction_keys(keys, valid_keys)
        else:
            keys = valid_keys
        logits = base_head.check_logits_final_dim(logits,
                                                  self.logits_dimension)
        predictions = {}
        with ops.name_scope('predictions', values=(logits, )):
            if pred_keys.LOGITS in keys:
                predictions[pred_keys.LOGITS] = logits
            if pred_keys.LOGISTIC in keys:
                logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC)
                predictions[pred_keys.LOGISTIC] = logistic
            two_class_logits = array_ops.concat(
                (array_ops.zeros_like(logits), logits),
                axis=-1,
                name='two_class_logits')
            if pred_keys.PROBABILITIES in keys:
                probabilities = nn.softmax(two_class_logits,
                                           name=pred_keys.PROBABILITIES)
                predictions[pred_keys.PROBABILITIES] = probabilities
            if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys:
                class_ids = math_ops.argmax(two_class_logits,
                                            axis=-1,
                                            name=pred_keys.CLASS_IDS)
                class_ids = array_ops.expand_dims(class_ids, axis=-1)
                if pred_keys.CLASS_IDS in keys:
                    predictions[pred_keys.CLASS_IDS] = class_ids
                if pred_keys.CLASSES in keys:
                    if self._label_vocabulary is not None:
                        classes = self._class_string_table.lookup(class_ids)
                    else:
                        classes = string_ops.as_string(class_ids,
                                                       name='str_classes')
                    predictions[pred_keys.CLASSES] = classes
            if pred_keys.ALL_CLASS_IDS in keys:
                predictions[pred_keys.ALL_CLASS_IDS] = base_head.all_class_ids(
                    logits, n_classes=2)
            if pred_keys.ALL_CLASSES in keys:
                predictions[pred_keys.ALL_CLASSES] = base_head.all_classes(
                    logits,
                    n_classes=2,
                    label_vocabulary=self._label_vocabulary)
            return predictions
Beispiel #40
0
def get_logits_and_prob(logits=None,
                        p=None,
                        multidimensional=False,
                        validate_args=False,
                        name="GetLogitsAndProb"):
    """Converts logits to probabilities and vice-versa, and returns both.

  Args:
    logits: Numeric `Tensor` representing log-odds.
    p: Numeric `Tensor` representing probabilities.
    multidimensional: `Boolean`, default `False`.
      If `True`, represents whether the last dimension of `logits` or `p`,
      a [N1, N2, ... k] dimensional tensor, represent the
      logits / probability between k classes. For `p`, this will
      additionally assert that the values in the last dimension sum to one.

      If `False`, this will instead assert that each value of `p` is in
      `[0, 1]`, and will do nothing to `logits`.
    validate_args: `Boolean`, default `False`.  Whether to assert `0 <= p <= 1`
      if multidimensional is `False`, otherwise that the last dimension of `p`
      sums to one.
    name: A name for this operation (optional).

  Returns:
    Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then
    the corresponding entry in the returned logits will be `-Inf` and `Inf`
    respectively.

  Raises:
    ValueError: if neither `p` nor `logits` were passed in, or both were.
  """
    with ops.name_scope(name, values=[p, logits]):
        if p is None and logits is None:
            raise ValueError("Must pass p or logits.")
        elif p is not None and logits is not None:
            raise ValueError("Must pass either p or logits, not both.")
        elif p is None:
            logits = array_ops.identity(logits, name="logits")
            with ops.name_scope("p"):
                if multidimensional:
                    p = nn.softmax(logits)
                else:
                    p = math_ops.sigmoid(logits)
        elif logits is None:
            with ops.name_scope("p"):
                p = array_ops.identity(p)
                if validate_args:
                    one = constant_op.constant(1., p.dtype)
                    dependencies = [check_ops.assert_non_negative(p)]
                    if multidimensional:
                        dependencies += [
                            assert_close(math_ops.reduce_sum(
                                p, reduction_indices=[-1]),
                                         one,
                                         message="p does not sum to 1.")
                        ]
                    else:
                        dependencies += [
                            check_ops.assert_less_equal(
                                p,
                                one,
                                message="p has components greater than 1.")
                        ]
                    p = control_flow_ops.with_dependencies(dependencies, p)
            with ops.name_scope("logits"):
                if multidimensional:
                    # Here we don't compute the multidimensional case, in a manner
                    # consistent with respect to the unidimensional case. We do so
                    # following the TF convention. Typically, you might expect to see
                    # logits = log(p) - log(gather(p, pivot)). A side-effect of being
                    # consistent with the TF approach is that the unidimensional case
                    # implicitly handles the second dimension but the multidimensional
                    # case explicitly keeps the pivot dimension.
                    logits = math_ops.log(p)
                else:
                    logits = math_ops.log(p) - math_ops.log(1. - p)
        return (logits, p)
Beispiel #41
0
 def _swish(input_tensor, scale):
   custom = op_hint.OpHint("cool_activation")
   input_tensor, scale = custom.add_inputs(input_tensor, scale)
   output = math_ops.sigmoid(input_tensor) * input_tensor * scale
   output, = custom.add_outputs(output)
   return output
Beispiel #42
0
 def _entropy(self):
     return (-self.logits * (math_ops.sigmoid(self.logits) - 1) +
             nn.softplus(-self.logits))
Beispiel #43
0
 def _forward(self, x):
     return math_ops.sigmoid(x)
Beispiel #44
0
inputs = tf.concat(1, [state, word])

with tf.variable_scope("Gates"):  # Reset gate and update gate.
    # We start with bias of 1.0 to not reset and not update.
    W_reset = tf.get_variable(name="reset_weight", shape=[state_size+input_size, state_size], \
        initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
    W_update = tf.get_variable(name="update_weight", shape=[state_size+input_size, state_size], \
        initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
    b_reset = tf.get_variable(name="reset_bias",
                              shape=[state_size],
                              initializer=tf.constant_initializer(0.0))
    b_update = tf.get_variable(name="update_bias",
                               shape=[state_size],
                               initializer=tf.constant_initializer(0.0))

    reset = sigmoid(tf.matmul(inputs, W_reset) + b_reset)
    update = sigmoid(tf.matmul(inputs, W_update) + b_update)

with tf.variable_scope("Candidate"):
    W_candidate = tf.get_variable(name="candidate_weight", shape=[state_size+input_size, state_size], \
        initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
    b_candidate = tf.get_variable(name="candidate_bias", shape=[state_size], \
        initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))

    reset_input = tf.concat(1, [reset * state, word])
    candidate = tanh(tf.matmul(reset_input, W_reset) + b_candidate)

new_h = update * state + (1 - update) * candidate

### WORKS!!!
Beispiel #45
0
    def __call__(self, inputs, state, scope=None):
        """Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    """
        num_proj = self._num_units if self._num_proj is None else self._num_proj

        if self._state_is_tuple:
            (c_prev, m_prev) = state
        else:
            c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units])
            m_prev = array_ops.slice(state, [0, self._num_units],
                                     [-1, num_proj])

        dtype = inputs.dtype
        input_size = inputs.get_shape().with_rank(2)[1]
        if input_size.value is None:
            raise ValueError(
                "Could not infer input size from inputs.get_shape()[-1]")
        with vs.variable_scope(scope or type(self).__name__,
                               initializer=self._initializer):  # "LSTMCell"
            concat_w = _get_concat_variable(
                "W", [input_size.value + num_proj, 4 * self._num_units], dtype,
                self._num_unit_shards)

            b = vs.get_variable("B",
                                shape=[4 * self._num_units],
                                initializer=array_ops.zeros_initializer,
                                dtype=dtype)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            cell_inputs = array_ops.concat(1, [inputs, m_prev])
            lstm_matrix = nn_ops.bias_add(
                math_ops.matmul(cell_inputs, concat_w), b)
            i, j, f, o = array_ops.split(1, 4, lstm_matrix)

            # Diagonal connections
            if self._use_peepholes:
                w_f_diag = vs.get_variable("W_F_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_i_diag = vs.get_variable("W_I_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)
                w_o_diag = vs.get_variable("W_O_diag",
                                           shape=[self._num_units],
                                           dtype=dtype)

            if self._use_peepholes:
                c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) *
                     c_prev +
                     sigmoid(i + w_i_diag * c_prev) * self._activation(j))
            else:
                c = (sigmoid(f + self._forget_bias) * c_prev +
                     sigmoid(i) * self._activation(j))

            if self._cell_clip is not None:
                # pylint: disable=invalid-unary-operand-type
                c = clip_ops.clip_by_value(c, -self._cell_clip,
                                           self._cell_clip)
                # pylint: enable=invalid-unary-operand-type

            if self._use_peepholes:
                m = sigmoid(o + w_o_diag * c) * self._activation(c)
            else:
                m = sigmoid(o) * self._activation(c)

            if self._num_proj is not None:
                concat_w_proj = _get_concat_variable(
                    "W_P", [self._num_units, self._num_proj], dtype,
                    self._num_proj_shards)

                m = math_ops.matmul(m, concat_w_proj)
                if self._proj_clip is not None:
                    # pylint: disable=invalid-unary-operand-type
                    m = clip_ops.clip_by_value(m, -self._proj_clip,
                                               self._proj_clip)
                    # pylint: enable=invalid-unary-operand-type

        new_state = (LSTMStateTuple(c, m)
                     if self._state_is_tuple else array_ops.concat(1, [c, m]))
        return m, new_state