def testGain(self): shape = (10, 10) for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) with self.session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() t2 = init2(shape).eval() self.assertAllClose(t1, t2 / 3.14)
def testGain(self): shape = (10, 10) for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) with self.test_session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() t2 = init2(shape).eval() return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
def testGain(self): shape = (10, 10) for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) with self.test_session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() t2 = init2(shape).eval() return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15)
def testGain(self): shape = (10, 10) for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(gain=3.14, seed=1, dtype=dtype) with self.session(graph=ops.Graph(), use_gpu=True): t1 = init1(shape).eval() t2 = init2(shape).eval() self.assertAllClose(t1, t2 / 3.14)
def test_variable_creation(self): np.random.seed(5218) # ====== create by numpy array ====== # tmp = np.random.rand(12, 8).astype('float32') K.variable(value=tmp, dtype='float32', name='x', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='x')) == tmp)) # ====== create by Variable name ====== # K.variable(value='x', name='z', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='z')) == tmp)) # ====== create by function ====== # def fn(shape): return np.full(shape=shape, fill_value=8) y = K.variable(value=fn, shape=(12, 18), dtype='float32', name='y', initialize=True) self.assertTrue( np.all(K.eval(y) == np.full(shape=(12, 18), fill_value=8))) # ====== create by initializer ====== # tmp = K.eval(init_ops.orthogonal_initializer(seed=5218)(shape=(8, 8))) w = K.variable(value=init_ops.orthogonal_initializer(seed=5218), shape=(8, 8), dtype='float32', name='w', initialize=True) self.assertTrue(np.all(K.eval(w) == tmp)) # ====== create by number ====== # K.variable(value=25, shape=(8, 8), dtype='float32', name='a', initialize=True) self.assertTrue(K.eval(K.variable(name='a')).sum() == 25 * 8 * 8) # ====== create by tensor ====== # t = tf.constant(value=3, shape=(12, 8), dtype='float32', name='dummy_constant') K.variable(value=t, name='b', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='b')) == K.eval(t))) # ====== create by Tensor name ====== # K.variable(value='dummy_constant', name='c', initialize=True) self.assertTrue(np.all(K.eval(K.variable(name='c')) == K.eval(t))) # ====== check all variable exist ====== # all_variables = [] all_variables_name = ['x', 'z', 'y', 'w', 'a', 'b', 'c'] for name in all_variables_name: v = K.get_all_variables(name=name) assert len(v) == 1, name all_variables.append(v[0]) # check no duplicate variables self.assertTrue(len(set(all_variables)) == len(all_variables_name))
def inference(self): with tf.device(self.devices[0]): x = self.x * self.pw x = self.batch_norm_layer(x, scope="pw_input") x = 2 * tf.tanh(x) x = tf.unstack(self.x, self.in_steps, 1) convLSTM2D_cell = rnn.Conv2DLSTMCell( input_shape=self.input_shape, output_channels=self.filters[0], kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(self.filters[0])) dropout_cell = DropoutWrapper(convLSTM2D_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = tf.nn.static_rnn(dropout_cell, x, dtype=tf.float32) scope = "activation_batch_norm_{}".format(self.filters[0]) outputs = self.batch_norm_layer(outputs, scope=scope, activation_fn=tf.nn.tanh) outputs = 2 * outputs x = tf.unstack(outputs, self.in_steps, 0) with tf.device(self.devices[-1]): for filter in self.filters[1:]: convLSTM2D_cell = rnn.Conv2DLSTMCell( input_shape=self.input_shape, output_channels=filter, kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(filter)) dropout_cell = DropoutWrapper(convLSTM2D_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = tf.nn.static_rnn(dropout_cell, x, dtype=tf.float32) scope = "activation_batch_norm_{}".format(filter) outputs = self.batch_norm_layer(outputs, scope=scope, activation_fn=tf.nn.tanh) outputs = 2 * outputs x = tf.unstack(outputs, self.in_steps, 0) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) outputs = outputs[-self.out_steps:] self.y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4]) return self.y_hat
def forward_pass(self, x): x = tf.unstack(x, self.in_steps, 1) filters = [self.input_shape[-1]] + self.filters for i in range(1, len(filters)): with tf.variable_scope('filter_{}'.format(i)): filter = filters[i] input_shape = self.input_shape[:-1] + [filters[i - 1]] mt_convLSTM2D_cell = MTConv2DLSTMCell( input_shape=input_shape, output_channels=filter, kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="mt_conv_lstm_cell_{}".format(i)) outputs, states = static_rnn(mt_convLSTM2D_cell, x, dtype=tf.float32) outputs = self._dense(outputs) outputs = self._batch_norm(outputs) x = tf.unstack(outputs, self.in_steps, 0) outputs = outputs[-self.out_steps:] y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4]) return y_hat
def _create_loss(self): ''' Risk estimation loss function. The output is the planed position we should hold to next day. The change rate of next day is self.y, so we loss two categories of money: - self.y * self.position is trade loss, cost * self.position is constant loss because of tax and like missing profit of buying national debt. Therefore, the loss function is formulated as: 100 * (- self.y * self.position + cost * self.position) = -100 * ((self.y - cost) * self.position) :return: ''' # with tf.device("/cpu:0"): xx = tf.unstack(self.x, self.step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer()) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) # batch_norm(signal, 0.9, center=True, scale=True, epsilon=0.001, activation_fn=tf.nn.relu6, # is_training=is_training, scope="activation_batch_norm", reuse=False) self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6. self.avg_position = tf.reduce_mean(self.position) # self.cost = 0.0002 self.loss = -100. * tf.reduce_mean( tf.multiply( (self.y - self.cost), self.position, name="estimated_risk"))
def forward_pass(self, x): x = tf.unstack(x, self.in_steps, 1) filters = [self.input_shape[-1]] + self.filters for i in range(1, len(filters)): filter = filters[i] input_shape = self.input_shape[:-1] + [filters[i - 1]] convLSTM2D_cell = rnn.Conv2DLSTMCell( input_shape=input_shape, output_channels=filter, kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(i)) dropout_cell = DropoutWrapper(convLSTM2D_cell, input_keep_prob=self.keep_rate, output_keep_prob=self.keep_rate, state_keep_prob=self.keep_rate) outputs, states = tf.nn.static_rnn(dropout_cell, x, dtype=tf.float32) outputs = self._batch_norm(outputs) x = tf.unstack(outputs, self.in_steps, 0) outputs = outputs[-self.out_steps:] y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4]) return y_hat
def inference(self): with tf.variable_scope('filter_0'): print('filter:', self.filters[0]) x = tf.unstack(self.x, self.in_steps, 1) mt_convLSTM2D_cell = MTConv2DLSTMCell( input_shape=self.input_shape, output_channels=self.filters[0], kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(self.filters[0])) outputs, states = static_rnn(mt_convLSTM2D_cell, x, dtype=tf.float32) outputs = self._dense(outputs) scope = "activation_batch_norm_{}".format(self.filters[0]) x = self.batch_norm_layer(outputs, scope=scope, activation_fn=tf.nn.tanh) x = tf.unstack(x, self.in_steps, 0) for i, filter in enumerate(self.filters[1:]): v_scope = 'filter_{}'.format(i + 1) with tf.variable_scope(v_scope): print('filter:', filter) mt_convLSTM2D_cell = MTConv2DLSTMCell( input_shape=self.input_shape, output_channels=filter, kernel_shape=[3, 3], forget_bias=1.0, initializers=orthogonal_initializer(), name="conv_lstm_cell_{}".format(filter)) outputs, states = static_rnn(mt_convLSTM2D_cell, x, dtype=tf.float32) x = self._dense(outputs) scope = "activation_batch_norm_{}".format(filter) outputs = self.batch_norm_layer(outputs, scope=scope, activation_fn=tf.nn.tanh) x = tf.unstack(x, self.in_steps, 0) outputs = x[-self.out_steps:] self.y_hat = tf.transpose(outputs, perm=[1, 0, 2, 3, 4]) return self.y_hat
def __init__(self, num_units, depth, forget_bias=1.0, state_is_tuple=True, use_peepholes=False, activation=None, gate_activation=None, cell_activation=None, initializer=None, input_gate_initializer=None, use_bias=True, reuse=None, name=None): """Initialize the basic NLSTM cell. Args: num_units: `int`, The number of hidden units of each cell state and hidden state. depth: `int`, The number of layers in the nest. forget_bias: `float`, The bias added to forget gates. state_is_tuple: If `True`, accepted and returned states are tuples of the `h_state` and `c_state`s. If `False`, they are concatenated along the column axis. The latter behavior will soon be deprecated. use_peepholes: `bool`(optional). activation: Activation function of the update values, including new inputs and new cell states. Default: `tanh`. gate_activation: Activation function of the gates, including the input, ouput, and forget gate. Default: `sigmoid`. cell_activation: Activation function of the first cell gate. Default: `identity`. Note that in the paper only the first cell_activation is identity. initializer: Initializer of kernel. Default: `orthogonal_initializer`. input_gate_initializer: Initializer of input gates. Default: `glorot_normal_initializer`. use_bias: `bool`. Default: `True`. reuse: `bool`(optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. name: `str`, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. """ super(NLSTMCell, self).__init__(_reuse=reuse, name=name) if not state_is_tuple: logging.warn("%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._use_peepholes = use_peepholes self._depth = depth self._activation = activation or math_ops.tanh self._gate_activation = gate_activation or math_ops.sigmoid self._cell_activation = cell_activation or array_ops.identity self._initializer = initializer or init_ops.orthogonal_initializer() self._input_gate_initializer = (input_gate_initializer or init_ops.glorot_normal_initializer()) self._use_bias = use_bias self._kernels = None self._biases = None self.built = False
def testShapesValues(self): for dtype in [dtypes.float32, dtypes.float64]: for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]: init = init_ops.orthogonal_initializer(dtype=dtype) tol = 1e-5 if dtype == dtypes.float32 else 1e-12 with self.test_session(graph=ops.Graph(), use_gpu=True): # Check the shape t = init(shape).eval() self.assertAllEqual(shape, t.shape) # Check orthogonality by computing the inner product t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1])) if t.shape[0] > t.shape[1]: self.assertAllClose( np.dot(t.T, t), np.eye(t.shape[1]), rtol=tol, atol=tol) else: self.assertAllClose( np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol)
def testShapesValues(self): for dtype in [dtypes.float32, dtypes.float64]: for shape in [(10, 10), (10, 9, 8), (100, 5, 5), (50, 40), (40, 50)]: init = init_ops.orthogonal_initializer(dtype=dtype) tol = 1e-5 if dtype == dtypes.float32 else 1e-12 with self.test_session(graph=ops.Graph(), use_gpu=True): # Check the shape t = init(shape).eval() self.assertAllEqual(shape, t.shape) # Check orthogonality by computing the inner product t = t.reshape((np.prod(t.shape[:-1]), t.shape[-1])) if t.shape[0] > t.shape[1]: self.assertAllClose( np.dot(t.T, t), np.eye(t.shape[1]), rtol=tol, atol=tol) else: self.assertAllClose( np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol)
def set_variable(self, inputs_shape, gate): if inputs_shape[1].value is None: raise ValueError( "Expected inputs.shape[-1] to be known, saw shape: %s" % inputs_shape) input_depth = inputs_shape[1].value # input weights if self._input_initializer is None: self._input_initializer = init_ops.random_normal_initializer( mean=0.0, stddev=0.001) # input weights variable self._input_kernel = self.add_variable( gate + "input_kernel", shape=[input_depth, self._num_units], initializer=self._input_initializer) # hide weight if self._hide_initializer is None: self._recurrent_initializer = init_ops.random_normal_initializer( mean=0.0, stddev=0.001) self._hide_kernel = self.add_variable( gate + "hide_kernel", shape=[self._num_units, self._num_units], initializer=self._recurrent_initializer) # cell weight if self._cell_initializer is None: self._cell_kernel_initializer = init_ops.orthogonal_initializer() self._cell_initializer = self.add_variable( gate + "cell_kernel", shape=[self._num_units, self._num_units], initializer=self._cell_kernel_initializer) if self._bias == 1.0: self._bias = self.add_variable( gate + "bias", shape=[self._num_units], initializer=init_ops.ones_initializer(dtype=self.dtype)) else: self._bias = self.add_variable( gate + "bias", shape=[self._num_units], initializer=init_ops.zeros_initializer(dtype=self.dtype)) return self._input_kernel, self._hide_kernel, self._cell_initializer, self._bias
def _create_loss(self): ''' 风险评估损失函数 Loss = -100. * mean(P * (R-c)) P : self.position, output, the planed position we should hold to next day R : self.y, the change rate of next day c : cost :return: ''' # self.x.shape = (batch_size, num_step, input_size) # xx.shape = (num_step, (batch_size, input_size)) xx = tf.unstack(self.x, self.num_step, 1) lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer()) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob, state_keep_prob=self.keep_prob) outputs, states = rnn.static_rnn(dropout_cell, xx, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" norm_signal = self.batch_norm_layer(signal, scope=scope) self.position = tf.nn.relu6(norm_signal, name="relu_limit") / 6. self.avg_position = tf.reduce_mean(self.position) self.loss = -100. * tf.reduce_mean(tf.multiply((self.y - self.cost), self.position, name='estimated_risk'))
def _create_loss(self): ''' loss function. :return: ''' # #储存在内存中 # with tf.device("/cpu:0"): # 矩阵分解,沿列第self.num_step个维分解,将张量 self.X 分割成 self.num_step 个张量数组[batch_size, input_size] XX = tf.unstack(self.X, self.num_step, 1) # 建立LSTM cell # orthogonal_initializer():正交矩阵的初始化器 lstm_cell = rnn.LSTMCell(self.hidden_size, forget_bias=1.0, initializer=orthogonal_initializer()) dropout_cell = DropoutWrapper(lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob, state_keep_prob=self.keep_prob) # LSTM层的 输出 和 状态 outputs, states = rnn.static_rnn(dropout_cell, XX, dtype=tf.float32) signal = tf.matmul(outputs[-1], self.weights['out']) + self.biases['out'] scope = "activation_batch_norm" # 对LSTM 的 输出 进行标准化 norm_signal = self.batch_norm_layer(signal, scope=scope) self.output = tf.nn.relu6(norm_signal, name="relu_limit") / 6.
def testInvalidShape(self): init1 = init_ops.orthogonal_initializer() with self.test_session(graph=ops.Graph(), use_gpu=True): self.assertRaises(ValueError, init1, shape=[5])
def testDuplicatedInitializer(self): init = init_ops.orthogonal_initializer() self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
def testInitializerDifferent(self): for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(seed=2, dtype=dtype) self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
def testInvalidShape(self): init1 = init_ops.orthogonal_initializer() with self.test_session(graph=ops.Graph(), use_gpu=True): self.assertRaises(ValueError, init1, shape=[5])
def testDuplicatedInitializer(self): init = init_ops.orthogonal_initializer() self.assertFalse(duplicated_initializer(self, init, 1, (10, 10)))
def testInitializerDifferent(self): for dtype in [dtypes.float32, dtypes.float64]: init1 = init_ops.orthogonal_initializer(seed=1, dtype=dtype) init2 = init_ops.orthogonal_initializer(seed=2, dtype=dtype) self.assertFalse(identicaltest(self, init1, init2, (10, 10)))
def _build(self, input_shape): """Create variables of the Popnn RNN. It can be called manually before `__call__()` or automatically through `__call__()`. In the former case, any subsequent `__call__()` will skip creating variables. Args: input_shape: a TensorShape object with 3 dimensions. Raises: ValueError: if input_shape has wrong dimension or unknown 3rd dimension. """ if self.built: return input_shape = tensor_shape.TensorShape(input_shape) if input_shape.ndims != 3: raise ValueError("Expecting input_shape with 3 dims, got %d" % input_shape.ndims) input_shape = input_shape.as_list() if input_shape[-1] is None: raise ValueError("The last dimension of the inputs to `_PopnnRNN` " "should be defined. Found `None`.") self._input_size = input_shape[-1] # Create the variables if self._kernel_initializer is None: self._kernel_initializer = init_ops.glorot_uniform_initializer( self._seed) if self._recurrent_initializer is None: self._recurrent_initializer = init_ops.orthogonal_initializer( self._seed) if self._bias_initializer is None: self._bias_initializer = init_ops.zeros_initializer() self._kernel_initializer = initializers.get(self._kernel_initializer) self._recurrent_initializer = initializers.get( self._recurrent_initializer) self._bias_initializer = initializers.get(self._bias_initializer) # Initialize the input weight tensor. kernel_shape = self.canonical_weight_shape kernel_shape[0] -= self.num_units self.kernel = self.add_weight("kernel", dtype=self._plain_dtype, initializer=self._kernel_initializer, shape=kernel_shape) # Initialize the recurrent weight tensor. recurrent_kernel_shape = self.canonical_weight_shape recurrent_kernel_shape[0] = self.num_units self.recurrent_kernel = self.add_weight( "recurrent_kernel", dtype=self._plain_dtype, initializer=self._recurrent_initializer, shape=recurrent_kernel_shape) self.biases = self.get_bias() self.states = [] if self._stateful: batch_size = input_shape[1 if self._time_major else 0] shapes = self.state_shape(batch_size) if not isinstance(shapes, tuple): shapes = (shapes, ) for i, shape in enumerate(shapes): self.states.append(K.zeros(shape)) self.built = True