def _testElu(self, np_features, use_gpu=False): np_elu = self._npElu(np_features) with self.test_session(use_gpu=use_gpu): elu = nn_ops.elu(np_features) tf_elu = elu.eval() self.assertAllClose(np_elu, tf_elu) self.assertShapeEqual(np_elu, elu)
def testGradGrad(self): with self.test_session(): x = array_ops.placeholder(dtype=dtypes.float32) elu = nn_ops.elu(x) g, = gradients_impl.gradients(elu, x) gg, = gradients_impl.gradients(g, x) for x_val in [-1, -0.5, 0.5, 1]: err = np.abs(gg.eval(feed_dict={x: x_val}) - _elu_grad_grad(x_val)) self.assertLess(err, 1e-4)
def testGradientFloat64(self): with self.test_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, dtype=dtypes.float64, name="x") y = nn_ops.elu(x, name="elu") x_init = np.asarray(x_val, dtype=np.float64, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], y, [2, 5], x_init_value=x_init) print("elu (float64) gradient err = ", err) self.assertLess(err, 1e-6)
def testGradGrad(self): with self.cached_session(): x = array_ops.placeholder(dtype=dtypes.float32) elu = nn_ops.elu(x) g, = gradients_impl.gradients(elu, x) gg, = gradients_impl.gradients(g, x) for x_val in [-1, -0.5, 0.5, 1]: err = np.abs(gg.eval(feed_dict={x: x_val}) - _elu_grad_grad(x_val)) self.assertLess(err, 1e-4)
def testGradientFloat64(self): with self.cached_session(): x_val = [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]] x = constant_op.constant(x_val, dtype=dtypes.float64, name="x") y = nn_ops.elu(x, name="elu") x_init = np.asarray(x_val, dtype=np.float64, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], y, [2, 5], x_init_value=x_init) print("elu (float64) gradient err = ", err) self.assertLess(err, 1e-6)
def __call__(self, inputs, state, scope=None): # state = [h_(t-1), s_(t-1)] hidden_state = array_ops.slice(state, begin=(0, 0), size=(self._batch_size, self._hidden_size)) context_state = array_ops.slice(state, begin=(0, self._hidden_size), size=(self._batch_size, self._context_size)) with vs.variable_scope(scope or type(self).__name__): B = vs.get_variable('B_matrix', shape=[self._input_size, self._context_size], initializer=glorot_initializer( in_size=self._input_size, out_size=self._context_size)) A = vs.get_variable('A_matrix', shape=[self._input_size, self._hidden_size], initializer=glorot_initializer( in_size=self._input_size, out_size=self._hidden_size)) R = vs.get_variable('R_matrix', shape=[self._hidden_size, self._hidden_size], initializer=glorot_initializer( in_size=self._hidden_size, out_size=self._hidden_size)) P = vs.get_variable('P_matrix', shape=[self._context_size, self._hidden_size], initializer=glorot_initializer( in_size=self._context_size, out_size=self._hidden_size)) bias_term = vs.get_variable( 'Bias', shape=[self._hidden_size], initializer=init_ops.constant_initializer(value=0.0)) new_context = (1.0 - self._alpha) * math_ops.matmul( inputs, B) + self._alpha * context_state # TODO: math_ops.batch_matmul? # math_ops.tanh, nn_ops.softsign new_hidden = nn_ops.elu( math_ops.matmul(new_context, P) + math_ops.matmul(inputs, A) + math_ops.matmul(hidden_state, R) + bias_term) new_state = array_ops.concat(values=[new_hidden, new_context], concat_dim=1) return new_state, new_state
def testGradGradFloat32(self): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = nn_ops.elu(x, name="elu") z = gradients_impl.gradients(y, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("elu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4)
def testGradGradFloat32(self): with self.cached_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], shape=[2, 5], name="x") y = nn_ops.elu(x, name="elu") z = gradients_impl.gradients(y, x) x_init = np.asarray( [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], dtype=np.float32, order="F") err = gradient_checker.compute_gradient_error( x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("elu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4)
def _testElu(self, np_features): np_elu = self._npElu(np_features) tf_elu = nn_ops.elu(np_features) self.assertAllClose(np_elu, tf_elu) self.assertShapeEqual(np_elu, tf_elu)
def biasd_dense_elu(x, y, z): dot = gen_composite_ops.my_biased_dense(x, y, z) return nn_ops.elu(dot) # with known kernel, should not expand.
def f(x): assert x.dtype == dtypes.float64 with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.elu(x) return tape.gradient(y, x)
def f(x): with backprop.GradientTape(persistent=True) as tape: tape.watch(x) y = nn_ops.elu(x) dy = tape.gradient(y, x) return tape.gradient(dy, x)
def __call__(self, inputs, state, scope=None): # state = [h_(t-1), s_(t-1)] hidden_state = array_ops.slice( state, begin=(0, 0), size=(self._batch_size, self._hidden_size) ) context_state = array_ops.slice( state, begin=(0, self._hidden_size), size=(self._batch_size, self._context_size) ) with vs.variable_scope(scope or type(self).__name__): B = vs.get_variable( 'B_matrix', shape=[self._input_size, self._context_size], initializer=glorot_initializer( in_size=self._input_size, out_size=self._context_size ) ) A = vs.get_variable( 'A_matrix', shape=[self._input_size, self._hidden_size], initializer=glorot_initializer( in_size=self._input_size, out_size=self._hidden_size ) ) R = vs.get_variable( 'R_matrix', shape=[self._hidden_size, self._hidden_size], initializer=glorot_initializer( in_size=self._hidden_size, out_size=self._hidden_size ) ) P = vs.get_variable( 'P_matrix', shape=[self._context_size, self._hidden_size], initializer=glorot_initializer( in_size=self._context_size, out_size=self._hidden_size ) ) bias_term = vs.get_variable( 'Bias', shape=[self._hidden_size], initializer=init_ops.constant_initializer( value=0.0 ) ) new_context = (1.0 - self._alpha) * math_ops.matmul(inputs, B) + self._alpha * context_state # TODO: math_ops.batch_matmul? # math_ops.tanh, nn_ops.softsign new_hidden = nn_ops.elu( math_ops.matmul(new_context, P) + math_ops.matmul(inputs, A) + math_ops.matmul(hidden_state, R) + bias_term ) new_state = array_ops.concat( values=[new_hidden, new_context], concat_dim=1 ) return new_state, new_state