Esempio n. 1
0
 def testGain(self):
   shape = (10, 10)
   for dtype in [dtypes.float32, dtypes.float64]:
     init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
     init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
     with self.session(graph=ops.Graph(), use_gpu=True):
       t1 = init1(shape).eval()
       t2 = init2(shape).eval()
     self.assertAllClose(t1, t2 / 3.14)
Esempio n. 2
0
 def testGain(self):
   shape = (10, 10)
   for dtype in [dtypes.float32, dtypes.float64]:
     init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
     init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
     with self.test_session(graph=ops.Graph(), use_gpu=True):
       t1 = init1(shape).eval()
       t2 = init2(shape).eval()
     return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
Esempio n. 3
0
 def testGain(self):
   shape = (10, 10)
   for dtype in [dtypes.float32, dtypes.float64]:
     init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
     init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
     with self.test_session(graph=ops.Graph(), use_gpu=True):
       t1 = init1(shape).eval()
       t2 = init2(shape).eval()
     return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
Esempio n. 4
0
 def testGain(self):
   shape = (10, 10)
   for dtype in [dtypes.float32, dtypes.float64]:
     init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
     init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype)
     with self.session(graph=ops.Graph(), use_gpu=True):
       t1 = init1(shape).eval()
       t2 = init2(shape).eval()
     self.assertAllClose(t1, t2 / 3.14)
Esempio n. 5
0
    def test_variable_creation(self):
        np.random.seed(5218)
        # ====== create by numpy array ====== #
        tmp = np.random.rand(12, 8).astype('float32')
        K.variable(value=tmp, dtype='float32', name='x', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='x')) == tmp))
        # ====== create by Variable name ====== #
        K.variable(value='x', name='z', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='z')) == tmp))

        # ====== create by function ====== #

        def fn(shape):
            return np.full(shape=shape, fill_value=8)

        y = K.variable(value=fn,
                       shape=(12, 18),
                       dtype='float32',
                       name='y',
                       initialize=True)
        self.assertTrue(
            np.all(K.eval(y) == np.full(shape=(12, 18), fill_value=8)))
        # ====== create by initializer ====== #
        tmp = K.eval(init_ops.orthogonal_initializer(seed=5218)(shape=(8, 8)))
        w = K.variable(value=init_ops.orthogonal_initializer(seed=5218),
                       shape=(8, 8),
                       dtype='float32',
                       name='w',
                       initialize=True)
        self.assertTrue(np.all(K.eval(w) == tmp))
        # ====== create by number ====== #
        K.variable(value=25,
                   shape=(8, 8),
                   dtype='float32',
                   name='a',
                   initialize=True)
        self.assertTrue(K.eval(K.variable(name='a')).sum() == 25 * 8 * 8)
        # ====== create by tensor ====== #
        t = tf.constant(value=3,
                        shape=(12, 8),
                        dtype='float32',
                        name='dummy_constant')
        K.variable(value=t, name='b', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='b')) == K.eval(t)))
        # ====== create by Tensor name ====== #
        K.variable(value='dummy_constant', name='c', initialize=True)
        self.assertTrue(np.all(K.eval(K.variable(name='c')) == K.eval(t)))
        # ====== check all variable exist ====== #
        all_variables = []
        all_variables_name = ['x', 'z', 'y', 'w', 'a', 'b', 'c']
        for name in all_variables_name:
            v = K.get_all_variables(name=name)
            assert len(v) == 1, name
            all_variables.append(v[0])
        # check no duplicate variables
        self.assertTrue(len(set(all_variables)) == len(all_variables_name))
Esempio n. 6
0
    def inference(self):
        with tf.device(self.devices[0]):
            x = self.x * self.pw
            x = self.batch_norm_layer(x, scope="pw_input")
            x = 2 * tf.tanh(x)
            x = tf.unstack(self.x, self.in_steps, 1)
            convLSTM2D_cell = rnn.Conv2DLSTMCell(
                input_shape=self.input_shape,
                output_channels=self.filters[0],
                kernel_shape=[3, 3],
                forget_bias=1.0,
                initializers=orthogonal_initializer(),
                name="conv_lstm_cell_{}".format(self.filters[0]))
            dropout_cell = DropoutWrapper(convLSTM2D_cell,
                                          input_keep_prob=self.keep_rate,
                                          output_keep_prob=self.keep_rate,
                                          state_keep_prob=self.keep_rate)
            outputs, states = tf.nn.static_rnn(dropout_cell,
                                               x,
                                               dtype=tf.float32)
            scope = "activation_batch_norm_{}".format(self.filters[0])
            outputs = self.batch_norm_layer(outputs,
                                            scope=scope,
                                            activation_fn=tf.nn.tanh)
            outputs = 2 * outputs
            x = tf.unstack(outputs, self.in_steps, 0)

        with tf.device(self.devices[-1]):
            for filter in self.filters[1:]:
                convLSTM2D_cell = rnn.Conv2DLSTMCell(
                    input_shape=self.input_shape,
                    output_channels=filter,
                    kernel_shape=[3, 3],
                    forget_bias=1.0,
                    initializers=orthogonal_initializer(),
                    name="conv_lstm_cell_{}".format(filter))
                dropout_cell = DropoutWrapper(convLSTM2D_cell,
                                              input_keep_prob=self.keep_rate,
                                              output_keep_prob=self.keep_rate,
                                              state_keep_prob=self.keep_rate)
                outputs, states = tf.nn.static_rnn(dropout_cell,
                                                   x,
                                                   dtype=tf.float32)
                scope = "activation_batch_norm_{}".format(filter)
                outputs = self.batch_norm_layer(outputs,
                                                scope=scope,
                                                activation_fn=tf.nn.tanh)
                outputs = 2 * outputs
                x = tf.unstack(outputs, self.in_steps, 0)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

        outputs = outputs[-self.out_steps:]
        self.y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4])
        return self.y_hat
Esempio n. 7
0
    def forward_pass(self, x):
        x = tf.unstack(x, self.in_steps, 1)
        filters = [self.input_shape[-1]] + self.filters
        for i in range(1, len(filters)):
            with tf.variable_scope('filter_{}'.format(i)):
                filter = filters[i]
                input_shape = self.input_shape[:-1] + [filters[i - 1]]
                mt_convLSTM2D_cell = MTConv2DLSTMCell(
                    input_shape=input_shape,
                    output_channels=filter,
                    kernel_shape=[3, 3],
                    forget_bias=1.0,
                    initializers=orthogonal_initializer(),
                    name="mt_conv_lstm_cell_{}".format(i))
                outputs, states = static_rnn(mt_convLSTM2D_cell,
                                             x,
                                             dtype=tf.float32)
                outputs = self._dense(outputs)

                outputs = self._batch_norm(outputs)
                x = tf.unstack(outputs, self.in_steps, 0)

        outputs = outputs[-self.out_steps:]
        y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4])
        return y_hat
Esempio n. 8
0
 def _create_loss(self):
     '''
     Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of
     next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss,
     cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore,
     the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position)
     :return:
     '''
     # with tf.device("/cpu:0"):
     xx = tf.unstack(self.x, self.step, 1)
     lstm_cell = rnn.LSTMCell(self.hidden_size,
                              forget_bias=1.0,
                              initializer=orthogonal_initializer())
     dropout_cell = DropoutWrapper(lstm_cell,
                                   input_keep_prob=self.keep_rate,
                                   output_keep_prob=self.keep_rate,
                                   state_keep_prob=self.keep_rate)
     outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32)
     signal = tf.matmul(outputs[-1],
                        self.weights['out']) + self.biases['out']
     scope = "activation_batch_norm"
     norm_signal = self.batch_norm_layer(signal, scope=scope)
     # batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6,
     #           is_training=is_training, scope="activation_batch_norm", reuse=False)
     self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6.
     self.avg_position = tf.reduce_mean(self.position)
     # self.cost = 0.0002
     self.loss = -100. * tf.reduce_mean(
         tf.multiply(
             (self.y - self.cost), self.position, name="estimated_risk"))
Esempio n. 9
0
    def forward_pass(self, x):
        x = tf.unstack(x, self.in_steps, 1)
        filters = [self.input_shape[-1]] + self.filters
        for i in range(1, len(filters)):
            filter = filters[i]
            input_shape = self.input_shape[:-1] + [filters[i - 1]]
            convLSTM2D_cell = rnn.Conv2DLSTMCell(
                input_shape=input_shape,
                output_channels=filter,
                kernel_shape=[3, 3],
                forget_bias=1.0,
                initializers=orthogonal_initializer(),
                name="conv_lstm_cell_{}".format(i))
            dropout_cell = DropoutWrapper(convLSTM2D_cell,
                                          input_keep_prob=self.keep_rate,
                                          output_keep_prob=self.keep_rate,
                                          state_keep_prob=self.keep_rate)
            outputs, states = tf.nn.static_rnn(dropout_cell,
                                               x,
                                               dtype=tf.float32)
            outputs = self._batch_norm(outputs)
            x = tf.unstack(outputs, self.in_steps, 0)

        outputs = outputs[-self.out_steps:]
        y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4])
        return y_hat
Esempio n. 10
0
    def inference(self):
        with tf.variable_scope('filter_0'):
            print('filter:', self.filters[0])
            x = tf.unstack(self.x, self.in_steps, 1)
            mt_convLSTM2D_cell = MTConv2DLSTMCell(
                input_shape=self.input_shape,
                output_channels=self.filters[0],
                kernel_shape=[3, 3],
                forget_bias=1.0,
                initializers=orthogonal_initializer(),
                name="conv_lstm_cell_{}".format(self.filters[0]))
            outputs, states = static_rnn(mt_convLSTM2D_cell,
                                         x,
                                         dtype=tf.float32)
            outputs = self._dense(outputs)
            scope = "activation_batch_norm_{}".format(self.filters[0])
            x = self.batch_norm_layer(outputs,
                                      scope=scope,
                                      activation_fn=tf.nn.tanh)
            x = tf.unstack(x, self.in_steps, 0)

        for i, filter in enumerate(self.filters[1:]):
            v_scope = 'filter_{}'.format(i + 1)
            with tf.variable_scope(v_scope):
                print('filter:', filter)
                mt_convLSTM2D_cell = MTConv2DLSTMCell(
                    input_shape=self.input_shape,
                    output_channels=filter,
                    kernel_shape=[3, 3],
                    forget_bias=1.0,
                    initializers=orthogonal_initializer(),
                    name="conv_lstm_cell_{}".format(filter))
                outputs, states = static_rnn(mt_convLSTM2D_cell,
                                             x,
                                             dtype=tf.float32)
                x = self._dense(outputs)
                scope = "activation_batch_norm_{}".format(filter)
                outputs = self.batch_norm_layer(outputs,
                                                scope=scope,
                                                activation_fn=tf.nn.tanh)
                x = tf.unstack(x, self.in_steps, 0)

        outputs = x[-self.out_steps:]
        self.y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4])
        return self.y_hat
Esempio n. 11
0
  def __init__(self, num_units, depth, forget_bias=1.0,
               state_is_tuple=True, use_peepholes=False,
               activation=None, gate_activation=None,
               cell_activation=None,
               initializer=None,
               input_gate_initializer=None,
               use_bias=True, reuse=None, name=None):
    """Initialize the basic NLSTM cell.

    Args:
      num_units: `int`, The number of hidden units of each cell state
        and hidden state.
      depth: `int`, The number of layers in the nest.
      forget_bias: `float`, The bias added to forget gates.
      state_is_tuple: If `True`, accepted and returned states are tuples of
        the `h_state` and `c_state`s.  If `False`, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      use_peepholes: `bool`(optional).
      activation: Activation function of the update values,
        including new inputs and new cell states.  Default: `tanh`.
      gate_activation: Activation function of the gates,
        including the input, ouput, and forget gate. Default: `sigmoid`.
      cell_activation: Activation function of the first cell gate. Default: `identity`.
        Note that in the paper only the first cell_activation is identity.
      initializer: Initializer of kernel. Default: `orthogonal_initializer`.
      input_gate_initializer: Initializer of input gates.
        Default: `glorot_normal_initializer`.
      use_bias: `bool`. Default: `True`.
      reuse: `bool`(optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: `str`, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
    """
    super(NLSTMCell, self).__init__(_reuse=reuse, name=name)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._use_peepholes = use_peepholes
    self._depth = depth
    self._activation = activation or math_ops.tanh
    self._gate_activation = gate_activation or math_ops.sigmoid
    self._cell_activation = cell_activation or array_ops.identity
    self._initializer = initializer or init_ops.orthogonal_initializer()
    self._input_gate_initializer = (input_gate_initializer 
                                    or init_ops.glorot_normal_initializer())
    self._use_bias = use_bias
    self._kernels = None
    self._biases = None
    self.built = False
Esempio n. 12
0
 def testShapesValues(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
       init = init_ops.orthogonal_initializer(dtype=dtype)
       tol = 1e-5 if dtype == dtypes.float32 else 1e-12
       with self.test_session(graph=ops.Graph(), use_gpu=True):
         # Check the shape
         t = init(shape).eval()
         self.assertAllEqual(shape, t.shape)
         # Check orthogonality by computing the inner product
         t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
         if t.shape[0] > t.shape[1]:
           self.assertAllClose(
               np.dot(t.T, t), np.eye(t.shape[1]), rtol=tol, atol=tol)
         else:
           self.assertAllClose(
               np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol)
Esempio n. 13
0
 def testShapesValues(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]:
       init = init_ops.orthogonal_initializer(dtype=dtype)
       tol = 1e-5 if dtype == dtypes.float32 else 1e-12
       with self.test_session(graph=ops.Graph(), use_gpu=True):
         # Check the shape
         t = init(shape).eval()
         self.assertAllEqual(shape, t.shape)
         # Check orthogonality by computing the inner product
         t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1]))
         if t.shape[0] > t.shape[1]:
           self.assertAllClose(
               np.dot(t.T, t), np.eye(t.shape[1]), rtol=tol, atol=tol)
         else:
           self.assertAllClose(
               np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol)
Esempio n. 14
0
    def set_variable(self, inputs_shape, gate):
        if inputs_shape[1].value is None:
            raise ValueError(
                "Expected inputs.shape[-1] to be known, saw shape: %s" %
                inputs_shape)
        input_depth = inputs_shape[1].value
        # input weights

        if self._input_initializer is None:
            self._input_initializer = init_ops.random_normal_initializer(
                mean=0.0, stddev=0.001)
        # input weights variable
        self._input_kernel = self.add_variable(
            gate + "input_kernel",
            shape=[input_depth, self._num_units],
            initializer=self._input_initializer)
        # hide weight
        if self._hide_initializer is None:
            self._recurrent_initializer = init_ops.random_normal_initializer(
                mean=0.0, stddev=0.001)
        self._hide_kernel = self.add_variable(
            gate + "hide_kernel",
            shape=[self._num_units, self._num_units],
            initializer=self._recurrent_initializer)
        # cell weight
        if self._cell_initializer is None:
            self._cell_kernel_initializer = init_ops.orthogonal_initializer()
        self._cell_initializer = self.add_variable(
            gate + "cell_kernel",
            shape=[self._num_units, self._num_units],
            initializer=self._cell_kernel_initializer)

        if self._bias == 1.0:
            self._bias = self.add_variable(
                gate + "bias",
                shape=[self._num_units],
                initializer=init_ops.ones_initializer(dtype=self.dtype))
        else:
            self._bias = self.add_variable(
                gate + "bias",
                shape=[self._num_units],
                initializer=init_ops.zeros_initializer(dtype=self.dtype))
        return self._input_kernel, self._hide_kernel, self._cell_initializer, self._bias
Esempio n. 15
0
 def _create_loss(self):
     '''
     风险评估损失函数
     Loss = -100. * mean(P * (R-c))
     P : self.position, output, the planed position we should hold to next day
     R : self.y, the change rate of next day
     c : cost
     :return:
     '''
     # self.x.shape = (batch_size, num_step, input_size)
     # xx.shape = (num_step, (batch_size, input_size))
     xx = tf.unstack(self.x, self.num_step, 1)
     lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer())
     dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob,
                                   output_keep_prob=self.keep_prob, state_keep_prob=self.keep_prob)
     outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32)
     signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out']
     scope = "activation_batch_norm"
     norm_signal = self.batch_norm_layer(signal, scope=scope)
     self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6.
     self.avg_position = tf.reduce_mean(self.position)
     self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name='estimated_risk'))
Esempio n. 16
0
    def _create_loss(self):
        '''
        loss function.
        :return:
        '''
        # #储存在内存中
        # with tf.device("/cpu:0"):
        # 矩阵分解,沿列第self.num_step个维分解,将张量 self.X 分割成 self.num_step 个张量数组[batch_size, input_size]
        XX = tf.unstack(self.X, self.num_step, 1)

        # 建立LSTM cell
        # orthogonal_initializer():正交矩阵的初始化器
        lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer())
        dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob,
                                      output_keep_prob=self.keep_prob, state_keep_prob=self.keep_prob)
        # LSTM层的 输出 和 状态
        outputs, states = rnn.static_rnn(dropout_cell, XX, dtype=tf.float32)
        signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out']
        scope = "activation_batch_norm"
        # 对LSTM 的 输出 进行标准化
        norm_signal = self.batch_norm_layer(signal, scope=scope)
        self.output = tf.nn.relu6(norm_signal, name="relu_limit") / 6.
Esempio n. 17
0
 def testInvalidShape(self):
   init1 = init_ops.orthogonal_initializer()
   with self.test_session(graph=ops.Graph(), use_gpu=True):
     self.assertRaises(ValueError, init1, shape=[5])
Esempio n. 18
0
 def testDuplicatedInitializer(self):
   init = init_ops.orthogonal_initializer()
   self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
Esempio n. 19
0
 def testInitializerDifferent(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
     init2 = init_ops.orthogonal_initializer(seed=2, dtype=dtype)
     self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
 def testInvalidShape(self):
     init1 = init_ops.orthogonal_initializer()
     with self.test_session(graph=ops.Graph(), use_gpu=True):
         self.assertRaises(ValueError, init1, shape=[5])
 def testDuplicatedInitializer(self):
     init = init_ops.orthogonal_initializer()
     self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
 def testInitializerDifferent(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype)
         init2 = init_ops.orthogonal_initializer(seed=2, dtype=dtype)
         self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
Esempio n. 23
0
    def _build(self, input_shape):
        """Create variables of the Popnn RNN.

    It can be called manually before `__call__()` or automatically through
    `__call__()`. In the former case, any subsequent `__call__()` will skip
    creating variables.

    Args:
      input_shape: a TensorShape object with 3 dimensions.

    Raises:
      ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
    """
        if self.built:
            return

        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims != 3:
            raise ValueError("Expecting input_shape with 3 dims, got %d" %
                             input_shape.ndims)
        input_shape = input_shape.as_list()
        if input_shape[-1] is None:
            raise ValueError("The last dimension of the inputs to `_PopnnRNN` "
                             "should be defined. Found `None`.")
        self._input_size = input_shape[-1]

        # Create the variables
        if self._kernel_initializer is None:
            self._kernel_initializer = init_ops.glorot_uniform_initializer(
                self._seed)

        if self._recurrent_initializer is None:
            self._recurrent_initializer = init_ops.orthogonal_initializer(
                self._seed)

        if self._bias_initializer is None:
            self._bias_initializer = init_ops.zeros_initializer()

        self._kernel_initializer = initializers.get(self._kernel_initializer)
        self._recurrent_initializer = initializers.get(
            self._recurrent_initializer)
        self._bias_initializer = initializers.get(self._bias_initializer)

        # Initialize the input weight tensor.
        kernel_shape = self.canonical_weight_shape
        kernel_shape[0] -= self.num_units
        self.kernel = self.add_weight("kernel",
                                      dtype=self._plain_dtype,
                                      initializer=self._kernel_initializer,
                                      shape=kernel_shape)

        # Initialize the recurrent weight tensor.
        recurrent_kernel_shape = self.canonical_weight_shape
        recurrent_kernel_shape[0] = self.num_units
        self.recurrent_kernel = self.add_weight(
            "recurrent_kernel",
            dtype=self._plain_dtype,
            initializer=self._recurrent_initializer,
            shape=recurrent_kernel_shape)

        self.biases = self.get_bias()

        self.states = []
        if self._stateful:
            batch_size = input_shape[1 if self._time_major else 0]
            shapes = self.state_shape(batch_size)
            if not isinstance(shapes, tuple):
                shapes = (shapes, )

            for i, shape in enumerate(shapes):
                self.states.append(K.zeros(shape))

        self.built = True