def _value_and_gradients(fn, fn_arg_list, result=None, grads=None, name=None): """Helper to `maybe_call_fn_and_grads`.""" with tf.name_scope(name, 'value_and_gradients', [fn_arg_list, result, grads]): def _convert_to_tensor(x, name): ctt = lambda x_: x_ if x_ is None else tf.convert_to_tensor(x_, name=name) return [ctt(x_) for x_ in x] if is_list_like(x) else ctt(x) fn_arg_list = (list(fn_arg_list) if is_list_like(fn_arg_list) else [fn_arg_list]) fn_arg_list = _convert_to_tensor(fn_arg_list, 'fn_arg') if result is None: result = fn(*fn_arg_list) if grads is None and tf.executing_eagerly(): # Ensure we disable bijector cacheing in eager mode. # TODO(b/72831017): Remove this once bijector cacheing is fixed for # eager mode. fn_arg_list = [0 + x for x in fn_arg_list] result = _convert_to_tensor(result, 'fn_result') if grads is not None: grads = _convert_to_tensor(grads, 'fn_grad') return result, grads if tf.executing_eagerly(): if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. def make_fn_slice(i): """Needed to prevent `cell-var-from-loop` pylint warning.""" return lambda *args: fn(*args)[i] grads = [ tfe.gradients_function(make_fn_slice(i))(*fn_arg_list)[i] for i in range(len(result)) ] else: grads = tfe.gradients_function(fn)(*fn_arg_list) else: if is_list_like(result) and len(result) == len(fn_arg_list): # Compute the block diagonal of Jacobian. # TODO(b/79158574): Guard this calculation by an arg which explicitly # requests block diagonal Jacobian calculation. grads = [tf.gradients(result[i], fn_arg_list[i])[0] for i in range(len(result))] else: grads = tf.gradients(result, fn_arg_list) return result, grads
def test_gradients_and_propagation_of_nan_in_x(self): # If x contains NaN, this should propagate through to y, and not mess up the # gradients associated with finite members of x. # In fact, even NaN members of x result in finite (zero) gradients. x_min = 0. x_max = 1. dtype = np.float32 num_pts = 4 implied_x_ref = np.linspace(x_min, x_max, num_pts, dtype=dtype) y_ref = 2 * implied_x_ref x_ = np.array([0., 0.1, np.nan, 0.4, 1.]).astype(dtype) y_expected = 2 * x_ x = tf.constant(x_) with self.test_session(): y = tfp.math.interp_regular_1d_grid(x, x_min, x_max, y_ref) y_ = self.evaluate(y) self.assertAllClose(y_, y_expected, atol=0, rtol=1e-6) if not tf.executing_eagerly(): dy_dx_ = tf.gradients(y, x)[0].eval() self.assertAllClose([2., 2., 0., 2., 2.], dy_dx_)
def compute_gradients(self, f, args, grad_ys=None): """Computes gradients using tf.GradientTape or tf.gradients. Arguments: f: Function to be differentiated. args: List of `Tensor` arguments to be passed to the function `f`. Gradients are computed with respect to these arguments. grad_ys: Optional. A `Tensor` with the same shape as the `Tensor` returned by `f` that contains the incoming gradients with respect to the result of `f`. Returns: grads: List containing gradients of `f` with respect to `args`. It has the same length as `args`. """ if tf.executing_eagerly(): grad_fn = tf.contrib.eager.gradients_function(f) if grad_ys is not None: grads = grad_fn(*args, dy=grad_ys) else: grads = grad_fn(*args) else: res = f(*args) grads = tf.gradients(res, args, grad_ys=grad_ys) return self.evaluate(grads)
def testNormalizations(self, conv_ctor, norm_ctor, norm_kwargs): if tf.executing_eagerly(): self.skipTest("Cannot test normalization correctness in Eager.") module = conv_ctor( output_channels=[16, 16], kernel_shapes=(3,), strides=(1,), paddings=("SAME",), normalization_ctor=norm_ctor, normalization_kwargs=norm_kwargs, normalize_final=True, activate_final=False) # No final activation, that would un-normalize. inputs = tf.random_uniform([16, 48, 64, 3]) output = module(inputs) with tf.train.SingularMonitoredSession() as session: output_np = session.run(output) # Convert the output into something where all the dimensions that should be # jointly normalized are combined to be on axis=1. if "axis" in norm_kwargs and norm_kwargs["axis"] == [1, 2]: # Check for instance normalization - combine spatial dimensions. output_np = np.reshape(output_np, [16, -1, 3]) else: # Check for layer normalization - combine all non-batch dimensions. output_np = np.reshape(output_np, [16, -1]) mean = np.mean(output_np, axis=1) std_dev = np.std(output_np, axis=1) # High tolerance - summing across big images, this normalization is fairly # approximate. self.assertAllClose(mean, np.zeros_like(mean), atol=2e-2) self.assertAllClose(std_dev, np.ones_like(std_dev), atol=2e-2)
def testDataFormat(self, module, data_format): net = module( output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, strides=self.strides, paddings=self.paddings, data_format=data_format) input_height, input_width, input_channels = 100, 100, 3 batch_size = 10 final_channel = self.output_channels[-1] if data_format == "NHWC": input_shape = [batch_size, input_height, input_width, input_channels] expected_output_shape = [ batch_size, input_height, input_width, final_channel ] else: input_shape = [batch_size, input_channels, input_height, input_width] expected_output_shape = [ batch_size, final_channel, input_height, input_width ] input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape) if tf.executing_eagerly() and data_format == "NCHW": expected_exception = ( tf.errors.UnimplementedError if module == snt.nets.ConvNet2D else tf.errors.InvalidArgumentError) with self.assertRaisesRegexp(expected_exception, "only supports NHWC"): output = net(input_to_net) else: output = net(input_to_net) self.assertEqual(output.get_shape().as_list(), expected_output_shape)
def testSampleWithSameSeed(self): if tf.executing_eagerly(): return scale = make_pd(1., 2) df = 4 chol_w = tfd.Wishart( df, scale_tril=chol(scale), input_output_cholesky=False) x = self.evaluate(chol_w.sample(1, seed=42)) chol_x = [chol(x[0])] full_w = tfd.Wishart(df, scale, input_output_cholesky=False) self.assertAllClose(x, self.evaluate(full_w.sample(1, seed=42))) chol_w_chol = tfd.Wishart( df, scale_tril=chol(scale), input_output_cholesky=True) self.assertAllClose(chol_x, self.evaluate(chol_w_chol.sample(1, seed=42))) eigen_values = tf.matrix_diag_part(chol_w_chol.sample(1000, seed=42)) np.testing.assert_array_less(0., self.evaluate(eigen_values)) full_w_chol = tfd.Wishart(df, scale=scale, input_output_cholesky=True) self.assertAllClose(chol_x, self.evaluate(full_w_chol.sample(1, seed=42))) eigen_values = tf.matrix_diag_part(full_w_chol.sample(1000, seed=42)) np.testing.assert_array_less(0., self.evaluate(eigen_values))
def testEventShape(self): # Shape is always known for reshaping in eager mode, so we skip these tests. if tf.executing_eagerly(): return event_shape_in, event_shape_out = self.build_shapes([2, 3], [6]) bijector = tfb.Reshape( event_shape_out=event_shape_out, event_shape_in=event_shape_in, validate_args=True) self.assertEqual( bijector.forward_event_shape(tf.TensorShape([4, 2, 3])).as_list(), [4, None]) self.assertEqual( bijector.forward_event_shape(tf.TensorShape([None, 2, 3])).as_list(), [None, None]) self.assertEqual( bijector.inverse_event_shape(tf.TensorShape([4, 6])).as_list(), [4, None, None]) self.assertEqual( bijector.inverse_event_shape(tf.TensorShape([None, 6])).as_list(), [None, None, None]) # If the input shape is totally unknown, there's nothing we can do! self.assertIsNone( bijector.forward_event_shape(tf.TensorShape(None)).ndims)
def testEventShape(self): shape_in_static = tf.TensorShape([2, 3]) shape_out_static = tf.TensorShape([6]) bijector = tfb.Reshape( event_shape_out=shape_out_static, event_shape_in=shape_in_static, validate_args=True) # Test that forward_ and inverse_event_shape are correct when # event_shape_in/_out are statically known, even when the input shapes # are only partially specified. self.assertEqual( bijector.forward_event_shape(tf.TensorShape([4, 2, 3])).as_list(), [4, 6]) self.assertEqual( bijector.inverse_event_shape(tf.TensorShape([4, 6])).as_list(), [4, 2, 3]) # Shape is always known for reshaping in eager mode, so we skip these tests. if tf.executing_eagerly(): return self.assertEqual( bijector.forward_event_shape(tf.TensorShape([None, 2, 3])).as_list(), [None, 6]) self.assertEqual( bijector.inverse_event_shape(tf.TensorShape([None, 6])).as_list(), [None, 2, 3]) # If the input shape is totally unknown, there's nothing we can do! self.assertIsNone( bijector.forward_event_shape(tf.TensorShape(None)).ndims)
def test_copy_layers(self): """Test copying layers.""" tg = dc.models.TensorGraph() features = Feature(shape=(None, 10)) dense = Dense( 10, in_layers=features, biases_initializer=tf.random_normal_initializer) constant = Constant(10.0) output = dense + constant tg.add_output(output) tg.set_loss(output) tg.fit_generator([]) replacements = {constant: Constant(20.0)} copy = output.copy(replacements, tg) assert isinstance(copy, Add) assert isinstance(copy.in_layers[0], Dense) assert isinstance(copy.in_layers[0].in_layers[0], Feature) assert copy.in_layers[1] == replacements[constant] variables = tg.get_layer_variables(dense) with tg._get_tf("Graph").as_default(): if tf.executing_eagerly(): values = [v.numpy() for v in variables] else: values = tg.session.run(variables) for v1, v2 in zip(values, copy.in_layers[0].variable_values): assert np.array_equal(v1, v2)
def testRegularizersInRegularizationLosses(self, transpose, use_bias): if transpose: module = functools.partial(snt.nets.ConvNet2DTranspose, output_shapes=[[100, 100]]) else: module = snt.nets.ConvNet2D if use_bias: regularizers = {"w": tf.contrib.layers.l1_regularizer(scale=0.5), "b": tf.contrib.layers.l2_regularizer(scale=0.5)} else: regularizers = {"w": tf.contrib.layers.l1_regularizer(scale=0.5)} model = module(output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, strides=self.strides, paddings=self.paddings, use_bias=use_bias, regularizers=regularizers) input_to_net = tf.random_normal(dtype=tf.float32, shape=(1, 100, 100, 3)) model(input_to_net) regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) expected_num_regularizers = 3 * (2 if use_bias else 1) self.assertLen(regularizers, expected_num_regularizers) if not tf.executing_eagerly(): self.assertRegexpMatches(regularizers[0].name, ".*l1_regularizer.*") if use_bias: self.assertRegexpMatches(regularizers[1].name, ".*l2_regularizer.*")
def _set_seed(seed): """Helper which uses graph seed if using TFE.""" # TODO(b/68017812): Deprecate once TFE supports seed. if tf.executing_eagerly(): tf.set_random_seed(seed) return None return seed
def testActivateBiasFlags(self, activate_final, use_bias, use_dropout): mlp = snt.nets.MLP(name=self.module_name, output_sizes=self.output_sizes, activate_final=activate_final, use_bias=use_bias, use_dropout=use_dropout) inputs = tf.random_normal( dtype=tf.float32, shape=[self.batch_size, self.input_size]) net = mlp(inputs) if not tf.executing_eagerly(): if activate_final: self.assertEqual(net.op.type, "Relu") elif use_bias: self.assertEqual(net.op.type, "Add") else: self.assertEqual(net.op.type, "MatMul") variables = mlp.get_variables() if use_bias: self.assertEqual(len(variables), len(self.output_sizes) * 2) else: self.assertEqual(len(variables), len(self.output_sizes))
def testInitialStateNames(self): if tf.executing_eagerly(): return self.skipTest("Tensor.name is meaningless in eager mode.") hidden_size_a = 3 hidden_size_b = 4 batch_size = 5 deep_rnn = snt.DeepRNN( [snt.LSTM(hidden_size_a, name="a"), snt.LSTM(hidden_size_b, name="b")]) deep_rnn_state = deep_rnn.initial_state(batch_size, trainable=True) self.assertEqual( deep_rnn_state[0][0].name, "deep_rnn_initial_state/a_initial_state/state_hidden_tiled:0") self.assertEqual( deep_rnn_state[0][1].name, "deep_rnn_initial_state/a_initial_state/state_cell_tiled:0") self.assertEqual( deep_rnn_state[1][0].name, "deep_rnn_initial_state/b_initial_state/state_hidden_tiled:0") self.assertEqual( deep_rnn_state[1][1].name, "deep_rnn_initial_state/b_initial_state/state_cell_tiled:0") other_start_state = deep_rnn.initial_state( batch_size, trainable=True, name="blah") self.assertEqual(other_start_state[0][0].name, "blah/a_initial_state/state_hidden_tiled:0") self.assertEqual(other_start_state[0][1].name, "blah/a_initial_state/state_cell_tiled:0") self.assertEqual(other_start_state[1][0].name, "blah/b_initial_state/state_hidden_tiled:0") self.assertEqual(other_start_state[1][1].name, "blah/b_initial_state/state_cell_tiled:0")
def _check_same_graph(self): """Checks that the module is not being connect to multiple Graphs. An instance of a Sonnet module 'owns' the variables it contains, and permits seamless variable sharing. As such, connecting a single module instance to multiple Graphs is not possible - this function will raise an error should that occur. Raises: DifferentGraphError: if the module is connected to a different Graph than it was previously used in. """ with ops.init_scope(): # We need `init_scope` incase we're running inside a defun. In that case # what we want is information about where the function will be called not # where the function is being built. current_graph = tf.get_default_graph() will_call_in_eager_context = tf.executing_eagerly() if self._graph is None: self._graph = current_graph self._set_module_info() if not will_call_in_eager_context: # Same graph checks only make sense when calling from graph mode (in eager # mode there is a single process level context where all modules are # created). if self._graph != current_graph: raise DifferentGraphError("Cannot connect module to multiple Graphs.")
def testCustomGetter(self): custom_getter = snt.custom_getters.Context(snt.custom_getters.stop_gradient) module = snt.nets.ConvNet2D(output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, rates=self.rates, strides=self.strides, paddings=self.paddings, custom_getter=custom_getter) input_shape = [10, 100, 100, 3] input_to_net = tf.random_normal(dtype=tf.float32, shape=input_shape) if tf.executing_eagerly(): with tf.GradientTape() as tape0: out0 = module(input_to_net) with tf.GradientTape() as tape1: with custom_getter: out1 = module(input_to_net) all_vars = tf.trainable_variables() out0_grads = tape0.gradient(out0, all_vars) out1_grads = tape1.gradient(out1, all_vars) else: out0 = module(input_to_net) with custom_getter: out1 = module(input_to_net) all_vars = tf.trainable_variables() out0_grads = tf.gradients(out0, all_vars) out1_grads = tf.gradients(out1, all_vars) for grad in out0_grads: self.assertNotEqual(None, grad) self.assertEqual([None] * len(out1_grads), out1_grads)
def testReprWorksCorrectlyMultivariate(self): mvn_static = tfd.MultivariateNormalDiag( loc=np.zeros([2, 2]), name="MVN") self.assertEqual( repr(mvn_static), "<tfp.distributions.MultivariateNormalDiag" " 'MVN/'" " batch_shape=(2,)" " event_shape=(2,)" " dtype=float64>") # There's no notion of partially known shapes in eager mode, so exit # early. if tf.executing_eagerly(): return mvn_dynamic = tfd.MultivariateNormalDiag( loc=tf.placeholder_with_default( input=np.ones((3, 3), dtype=np.float32), shape=[None, 3]), name="MVN2") self.assertEqual( repr(mvn_dynamic), "<tfp.distributions.MultivariateNormalDiag" " 'MVN2/'" " batch_shape=(?,)" # Partially known. " event_shape=(3,)" " dtype=float32>")
def _call(self, *args, **kwargs): """Entry point when a module is called to connect it to the graph. This is the entry point when users connect a Module into the Graph. The underlying _build method will have been wrapped in a Template by the constructor, and we call this template with the provided inputs here. Note we use `_call` instead of `__call__` to allow instance level monkey patching (see `defun`). Args: *args: Arguments for underlying _build method. **kwargs: Keyword arguments for underlying _build method. Returns: The result of the underlying _build method. """ self._check_init_called() self._check_same_graph() with self._capture_variables(): outputs, subgraph_name_scope = self._template(*args, **kwargs) self._is_connected = True if not tf.executing_eagerly(): # In eager mode the module is called a lot more frequently than in graph # mode (for each training step) and so we don't keep track of connected # subgraphs (since there will be orders of magnitude more of them). self._add_connected_subgraph(self._build, outputs, subgraph_name_scope, *args, **kwargs) return outputs
def testRegularizers(self, trainable, state_size): batch_size = 6 # Set the attribute to the class since it we can't set properties of # abstract classes snt.RNNCore.state_size = state_size flat_state_size = nest.flatten(state_size) core = snt.RNNCore(name="dummy_core") flat_regularizer = ([tf.contrib.layers.l1_regularizer(scale=0.5)] * len(flat_state_size)) trainable_regularizers = nest.pack_sequence_as( structure=state_size, flat_sequence=flat_regularizer) core.initial_state(batch_size, dtype=tf.float32, trainable=trainable, trainable_regularizers=trainable_regularizers) graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if not trainable: self.assertFalse(graph_regularizers) else: self.assertEqual(len(graph_regularizers), len(flat_state_size)) if not tf.executing_eagerly(): for i in range(len(flat_state_size)): self.assertRegexpMatches( graph_regularizers[i].name, ".*l1_regularizer.*")
def test_container_not_supported_in_eager(self): if not tf.executing_eagerly(): self.skipTest("Skipping test in graph mode.") container = ReuseVarsTest.VariableContainer("name") with self.assertRaisesRegexp(ValueError, ".* not supported in eager mode .*"): container.method_with_reuse()
def _value_and_gradient(fn, *args): """Calls `fn` and computes the gradient of the result wrt `arg`.""" if tf.executing_eagerly(): v, g = tfe.value_and_gradients_function(fn)(args) else: v = fn(*args) g = tf.gradients(v, args) return v, g
def grad_potential(self, position, check_numerics=True): """Get gradient of potential function at current location.""" if tf.executing_eagerly(): grad = tfe.gradients_function(self.potential)(position)[0] else: grad = tf.gradients(self.potential(position), position)[0] return grad
def _convert_to_tensor(value, name=None, preferred_dtype=None): """Converts to tensor avoiding an eager bug that loses float precision.""" # TODO(b/116672045): Remove this function. if (tf.executing_eagerly() and preferred_dtype is not None and (preferred_dtype.is_integer or preferred_dtype.is_bool)): v = tf.convert_to_tensor(value, name=name) if v.dtype.is_floating: return v return tf.convert_to_tensor( value, name=name, preferred_dtype=preferred_dtype)
def testGetVariable(self, use_resource): if tf.executing_eagerly() and not use_resource: self.skipTest("Ref variables not supported in eager mode.") variables = [] with util.notify_about_variables(variables.append): with tf.variable_scope("", use_resource=use_resource): x = tf.get_variable("x", []) self.assertVariableType(x, use_resource) self.assertEqual(variables, [x])
def test_reuse_vars_subgraph_recording(self): obj1 = ReuseVarsTest.ModuleReuse(shape=[3, 4], name="scope1") self.assertFalse(obj1.is_connected) obj1_a_outputs = obj1.a() self.assertTrue(obj1.is_connected) if not tf.executing_eagerly(): self.assertEqual(obj1.last_connected_subgraph.name_scope, "scope1/a/") self.assertIs(obj1.last_connected_subgraph.module, obj1) self.assertEqual(obj1.last_connected_subgraph.inputs, {}) self.assertIs(obj1.last_connected_subgraph.outputs, obj1_a_outputs)
def run_with_static_graph(): with tf.Graph().as_default(): print(tf.executing_eagerly()) # False model = Model(num_actions=env.action_space.n) agent = A2CAgent(model) rewards_history = agent.train(env) print("Finished training, testing...") print("%d out of 200" % agent.test(env)) # 200 out of 200
def testCopy(self): # 5 random index points in R^2 index_points_1 = np.random.uniform(-4., 4., (5, 2)).astype(np.float32) # 10 random index points in R^2 index_points_2 = np.random.uniform(-4., 4., (10, 2)).astype(np.float32) # ==> shape = [6, 25, 2] if not self.is_static: index_points_1 = tf.placeholder_with_default(index_points_1, shape=None) index_points_2 = tf.placeholder_with_default(index_points_2, shape=None) mean_fn = lambda x: np.array([0.], np.float32) kernel_1 = psd_kernels.ExponentiatedQuadratic() kernel_2 = psd_kernels.ExpSinSquared() tp1 = tfd.StudentTProcess( df=3., kernel=kernel_1, index_points=index_points_1, mean_fn=mean_fn, jitter=1e-5) tp2 = tp1.copy(df=4., index_points=index_points_2, kernel=kernel_2) event_shape_1 = [5] event_shape_2 = [10] self.assertEqual(tp1.mean_fn, tp2.mean_fn) self.assertIsInstance(tp1.kernel, psd_kernels.ExponentiatedQuadratic) self.assertIsInstance(tp2.kernel, psd_kernels.ExpSinSquared) if self.is_static or tf.executing_eagerly(): self.assertAllEqual(tp1.batch_shape, tp2.batch_shape) self.assertAllEqual(tp1.event_shape, event_shape_1) self.assertAllEqual(tp2.event_shape, event_shape_2) self.assertEqual(self.evaluate(tp1.df), 3.) self.assertEqual(self.evaluate(tp2.df), 4.) self.assertAllEqual(tp2.index_points, index_points_2) self.assertAllEqual(tp1.index_points, index_points_1) self.assertAllEqual(tp2.index_points, index_points_2) self.assertAllEqual( tf.contrib.util.constant_value(tp1.jitter), tf.contrib.util.constant_value(tp2.jitter)) else: self.assertAllEqual( self.evaluate(tp1.batch_shape_tensor()), self.evaluate(tp2.batch_shape_tensor())) self.assertAllEqual( self.evaluate(tp1.event_shape_tensor()), event_shape_1) self.assertAllEqual( self.evaluate(tp2.event_shape_tensor()), event_shape_2) self.assertEqual(self.evaluate(tp1.jitter), self.evaluate(tp2.jitter)) self.assertEqual(self.evaluate(tp1.df), 3.) self.assertEqual(self.evaluate(tp2.df), 4.) self.assertAllEqual(self.evaluate(tp1.index_points), index_points_1) self.assertAllEqual(self.evaluate(tp2.index_points), index_points_2)
def testDynamicEventShape(self): if tf.executing_eagerly(): return loc = np.float32(self._rng.rand(2, 3, 2)) scale_diag = np.float32(self._rng.rand(2, 3, 2)) mvn = tfd.MultivariateNormalDiag( loc=tf.placeholder_with_default(input=loc, shape=[2, 3, None]), scale_diag=tf.placeholder_with_default( input=scale_diag, shape=[2, 3, None])) self.assertListEqual(mvn.batch_shape.as_list(), [2, 3]) self.assertListEqual(mvn.event_shape.as_list(), [None])
def testHalfNormalShapeWithPlaceholders(self): if tf.executing_eagerly(): return scale = tf.placeholder_with_default(input=[1., 2], shape=None) halfnorm = tfd.HalfNormal(scale=scale) # get_batch_shape should return an "<unknown>" tensor. self.assertEqual(halfnorm.batch_shape, tf.TensorShape(None)) self.assertEqual(halfnorm.event_shape, ()) self.assertAllEqual(self.evaluate(halfnorm.event_shape_tensor()), []) self.assertAllEqual(self.evaluate(halfnorm.batch_shape_tensor()), [2])
def testInvalidPermException(self): msg = '`perm` must be a valid permutation vector.' if self.is_static or tf.executing_eagerly(): with self.assertRaisesRegexp(ValueError, msg): bijector = tfb.Transpose(perm=[1, 2], validate_args=True) else: with self.assertRaisesOpError(msg): bijector = tfb.Transpose( perm=tf.placeholder_with_default([1, 2], shape=[2]), validate_args=True) self.evaluate(bijector.forward([[0, 1]]))
def testDropout(self): if tf.executing_eagerly(): self.skipTest("Test not supported when executing eagerly") mlp_name = "test_dropout_on_mlp" mlp = snt.nets.MLP([1], use_dropout=True, use_bias=False, activate_final=True, name=mlp_name) _ = mlp(tf.ones([1, 1]), is_training=True, dropout_keep_prob=0.5) op_names = [op.name for op in tf.get_default_graph().get_operations()] op_to_look_for = "{}_1/dropout/Shape".format(mlp_name) self.assertIn(op_to_look_for, op_names)
def _intel_cpu_quantize_weight_eightbit(self, parent, input_node, per_channel, quantization_mode=b"SCALED"): base_name = input_node.name + "_" qint8_const_name = base_name + "qint8_const" min_name = base_name + "min" max_name = base_name + "max" float_tensor = tensor_util.MakeNdarray(input_node.attr["value"].tensor) epsilon = 1e-4 # Needs to be set empirically if accuracy is not satisfactory if parent in ("Conv2D", "MatMul"): if per_channel: ranges = np.abs(float_tensor).max(axis=(0, 1, 2)) min_value = -ranges max_value = ranges # nudging min-max values outside epsilon radius around zero ranges[ranges < epsilon] = epsilon min_value[np.abs(min_value) < epsilon] = -epsilon max_value[np.abs(max_value) < epsilon] = epsilon qint8_tensor = (float_tensor * 127.0 / ranges).astype(np.int8) else: min_value = np.min(float_tensor.flatten()) max_value = np.max(float_tensor.flatten()) # Same processing of min-max as in quantize_weight_eightbit # function. if min_value > 0.0: min_value = 0.0 if min_value == max_value: if abs(min_value) < 0.000001: max_value = min_value + 1.0 elif min_value > 0: max_value = 2 * min_value else: max_value = min_value / 2.0 sess = tf.compat.v1.Session() with sess.as_default(): quantize_op = array_ops.quantize_v2( float_tensor, min_value, max_value, dtypes.qint8, mode=quantization_mode, round_mode="HALF_TO_EVEN") qint8_tensor = quantize_op[0].numpy( ) if tf.executing_eagerly() else quantize_op[0].eval() # Updated min-max values should be passed to the next # feeding node. min_value = quantize_op[1].numpy() if tf.executing_eagerly( ) else quantize_op[1].eval() max_value = quantize_op[2].numpy() if tf.executing_eagerly( ) else quantize_op[2].eval() sess.close() elif parent == "DepthwiseConv2dNative": # get the max values based on dim 0 and 1 for depthwise conv # since, the output channel will be dim 2 * dim 3 ranges = np.abs(float_tensor).max(axis=(0, 1)) ranges = ranges.flatten() min_value = -ranges max_value = ranges # nudging min-max values outside epsilon radius around zero ranges[ranges < epsilon] = epsilon min_value[np.abs(min_value) < epsilon] = -epsilon max_value[np.abs(max_value) < epsilon] = epsilon # Since output channel will be 1 dim which is dim 2 * dim 3 # When divide by range, qint8_tensor needs to be 3 dim # where, 3rd dim should be same dim of ranges a, b, c, d = float_tensor.shape qint8_tensor = (float_tensor.reshape(a, b, c * d) * 127.0 / ranges).astype(np.int8) # get the shape back to 4 dim qint8_tensor = qint8_tensor.reshape(a, b, c, d) shape = tensor_util.TensorShapeProtoToList( input_node.attr["value"].tensor.tensor_shape) qint8_const_node = helper.create_constant_node(qint8_const_name, qint8_tensor, dtypes.qint8, shape=shape) min_node = helper.create_constant_node(min_name, min_value, dtypes.float32, device=self.device) max_node = helper.create_constant_node(max_name, max_value, dtypes.float32, device=self.device) dequantize_node = helper.create_node( "Dequantize", input_node.name, [qint8_const_name, min_name, max_name]) helper.set_attr_dtype(dequantize_node, "T", dtypes.qint8) helper.set_attr_string(dequantize_node, "mode", b"SCALED") self.add_output_graph_node(qint8_const_node) self.add_output_graph_node(min_node) self.add_output_graph_node(max_node) self.add_output_graph_node(dequantize_node)
def has_eager_been_enabled(): """Returns true iff in TF2 or in TF1 with eager execution enabled.""" with tf.init_scope(): return tf.executing_eagerly()
def create_eval_metrics(self, features, labels, estimator_spec, metric_fn): """Creates evaluation metrics from the given arguments. Args: features: Input `dict` of `Tensor` objects. labels: Labels `Tensor` or a dictionary of string label name to `Tensor` (for multi-head). estimator_spec: The `EstimatorSpec` created by a `Head` instance. metric_fn: A function which should obey the following signature: - Args: can only have following three arguments in any order: * predictions: Predictions `Tensor` or dict of `Tensor` created by given `Head`. * features: Input `dict` of `Tensor` objects created by `input_fn` which is given to `estimator.evaluate` as an argument. * labels: Labels `Tensor` or dict of `Tensor` (for multi-head) created by `input_fn` which is given to `estimator.evaluate` as an argument. - Returns: Dict of metric results keyed by name. Final metrics are a union of this and `estimator`s existing metrics. If there is a name conflict between this and `estimator`s existing metrics, this will override the existing one. The values of the dict are the results of calling a metric function, namely a `(metric_tensor, update_op)` tuple. """ # TODO: Create CPU eval metrics non-lazily, similar to summaries.py. # If estimator_spec is not a TPUEstimatorSpec we create dummy metric_fn # and args. if isinstance(estimator_spec, tf.estimator.EstimatorSpec): spec_fn, spec_args = lambda: estimator_spec.eval_metric_ops, [] else: spec_fn, spec_args = estimator_spec.eval_metrics self._eval_metrics_store.add_eval_metrics( self._templatize_metric_fn(spec_fn), spec_args) if tf_compat.version_greater_or_equal( "1.13.0") and tf.executing_eagerly(): loss_metrics = tf.keras.metrics.Mean("mean_loss") loss_metrics(estimator_spec.loss) else: loss_metrics = tf_compat.v1.metrics.mean(estimator_spec.loss) def loss_fn(loss): if self._use_tpu: return {"loss": tf_compat.v1.metrics.mean(loss)} return {"loss": loss_metrics} loss_fn_args = [tf.reshape(estimator_spec.loss, [1])] self._eval_metrics_store.add_eval_metrics( self._templatize_metric_fn(loss_fn), loss_fn_args) # NOTE: the user supplied metrics_fn must be added last. This is because we # want user metrics to override AdaNet's metrics. if metric_fn: metric_fn_args = {} argspec = inspect.getargspec(metric_fn).args if "features" in argspec: metric_fn_args["features"] = features if "labels" in argspec: metric_fn_args["labels"] = labels if "predictions" in argspec: metric_fn_args["predictions"] = estimator_spec.predictions additional_metrics = call_eval_metrics((metric_fn, metric_fn_args)) def additional_metrics_fn(**kwargs): if self._use_tpu: return call_eval_metrics((metric_fn, kwargs)) return additional_metrics self._eval_metrics_store.add_eval_metrics( self._templatize_metric_fn(additional_metrics_fn), metric_fn_args)
def __init__(self, learning_rate, preconditioner_decay_rate=0.95, data_size=1, burnin=25, diagonal_bias=1e-8, name=None, parallel_iterations=10, variable_scope=None): default_name = 'StochasticGradientLangevinDynamics' with tf.name_scope(name, default_name, [ learning_rate, preconditioner_decay_rate, data_size, burnin, diagonal_bias ]): if tf.executing_eagerly(): raise NotImplementedError('Eager execution currently not supported for ' ' SGLD optimizer.') if variable_scope is None: var_scope_name = tf.get_default_graph().unique_name( name or default_name) with tf.variable_scope(var_scope_name) as scope: self._variable_scope = scope else: self._variable_scope = variable_scope self._preconditioner_decay_rate = tf.convert_to_tensor( preconditioner_decay_rate, name='preconditioner_decay_rate') self._data_size = tf.convert_to_tensor( data_size, name='data_size') self._burnin = tf.convert_to_tensor(burnin, name='burnin') self._diagonal_bias = tf.convert_to_tensor( diagonal_bias, name='diagonal_bias') self._learning_rate = tf.convert_to_tensor( learning_rate, name='learning_rate') self._parallel_iterations = parallel_iterations with tf.variable_scope(self._variable_scope): self._counter = tf.get_variable( 'counter', initializer=0, trainable=False) self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._preconditioner_decay_rate, message='`preconditioner_decay_rate` must be non-negative'), tf.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.'), ], self._preconditioner_decay_rate) self._data_size = control_flow_ops.with_dependencies([ tf.assert_greater( self._data_size, 0, message='`data_size` must be greater than zero') ], self._data_size) self._burnin = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf.assert_integer( self._burnin, message='`burnin` must be an integer') ], self._burnin) self._diagonal_bias = control_flow_ops.with_dependencies([ tf.assert_non_negative( self._diagonal_bias, message='`diagonal_bias` must be non-negative') ], self._diagonal_bias) super(StochasticGradientLangevinDynamics, self).__init__( use_locking=False, name=name or default_name)
def _testMVN(self, base_distribution_class, base_distribution_kwargs, batch_shape=(), event_shape=(), not_implemented_message=None): # Overriding shapes must be compatible w/bijector; most bijectors are # batch_shape agnostic and only care about event_ndims. # In the case of `Affine`, if we got it wrong then it would fire an # exception due to incompatible dimensions. batch_shape_pl = tf.compat.v1.placeholder_with_default( input=np.int32(batch_shape), shape=None, name="dynamic_batch_shape") event_shape_pl = tf.compat.v1.placeholder_with_default( input=np.int32(event_shape), shape=None, name="dynamic_event_shape") fake_mvn_dynamic = self._cls()( distribution=base_distribution_class( validate_args=True, **base_distribution_kwargs), bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril), batch_shape=batch_shape_pl, event_shape=event_shape_pl, validate_args=True) fake_mvn_static = self._cls()( distribution=base_distribution_class( validate_args=True, **base_distribution_kwargs), bijector=tfb.Affine(shift=self._shift, scale_tril=self._tril), batch_shape=batch_shape, event_shape=event_shape, validate_args=True) actual_mean = np.tile(self._shift, [2, 1]) # Affine elided this tile. actual_cov = np.matmul(self._tril, np.transpose(self._tril, [0, 2, 1])) def actual_mvn_log_prob(x): return np.concatenate([[ stats.multivariate_normal(actual_mean[i], actual_cov[i]).logpdf(x[:, i, :]) ] for i in range(len(actual_cov))]).T actual_mvn_entropy = np.concatenate( [[stats.multivariate_normal(actual_mean[i], actual_cov[i]).entropy()] for i in range(len(actual_cov))]) self.assertAllEqual([3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) x = self.evaluate(fake_mvn_static.sample(5, seed=0)) for unsupported_fn in (fake_mvn_static.log_cdf, fake_mvn_static.cdf, fake_mvn_static.survival_function, fake_mvn_static.log_survival_function): with self.assertRaisesRegexp(NotImplementedError, not_implemented_message): unsupported_fn(x) num_samples = 7e3 for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=0) x = y[0:5, ...] sample_mean = tf.reduce_mean(input_tensor=y, axis=0) centered_y = tf.transpose(a=y - sample_mean, perm=[1, 2, 0]) sample_cov = tf.matmul( centered_y, centered_y, transpose_b=True) / num_samples [ sample_mean_, sample_cov_, x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, fake_mean_, fake_entropy_, ] = self.evaluate([ sample_mean, sample_cov, x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), fake_mvn.mean(), fake_mvn.entropy(), ]) self.assertAllClose(actual_mean, sample_mean_, atol=0.1, rtol=0.1) self.assertAllClose(actual_cov, sample_cov_, atol=0., rtol=0.1) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 3], x_.shape) self.assertAllEqual([3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose( actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) self.assertAllClose( np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5) self.assertAllClose(actual_mean, fake_mean_, atol=0., rtol=1e-6) self.assertAllClose(actual_mvn_entropy, fake_entropy_, atol=0., rtol=1e-6)
def static_value(self, t): """Gets the eager/immediate value of `t`, or `None` if `t` is a Tensor.""" if tf.executing_eagerly(): return t.numpy() return None
def setUp(self): tf.compat.v1.reset_default_graph() keras.backend.clear_session() tf.compat.v1.disable_eager_execution() print("Eager Execution:", tf.executing_eagerly())
def _initialize_params( self, model: KERAS_MODEL_TYPE, use_logits: bool, input_layer: int, output_layer: int, ): """ Initialize most parameters of the classifier. This is a convenience function called by `__init__` and `__setstate__` to avoid code duplication. :param model: Keras model :param use_logits: True if the output of the model are logits. :param input_layer: Which layer to consider as the Input when the model has multiple input layers. :param output_layer: Which layer to consider as the Output when the model has multiple output layers. """ # pylint: disable=E0401 if self.is_tensorflow: import tensorflow as tf # lgtm [py/repeated-import] if tf.executing_eagerly(): raise ValueError( "TensorFlow is executing eagerly. Please disable eager execution." ) import tensorflow.keras as keras import tensorflow.keras.backend as k else: import keras # lgtm [py/repeated-import] import keras.backend as k if hasattr(model, "inputs"): self._input_layer = input_layer self._input = model.inputs[input_layer] else: self._input = model.input self._input_layer = 0 if hasattr(model, "outputs"): self._output = model.outputs[output_layer] self._output_layer = output_layer else: self._output = model.output self._output_layer = 0 _, self._nb_classes = k.int_shape(self._output) self._input_shape = k.int_shape(self._input)[1:] logger.debug( "Inferred %i classes and %s as input shape for Keras classifier.", self.nb_classes, str(self.input_shape), ) self._use_logits = use_logits # Get loss function if not hasattr(self._model, "loss"): logger.warning( "Keras model has no loss set. Classifier tries to use `k.sparse_categorical_crossentropy`." ) loss_function = k.sparse_categorical_crossentropy else: if isinstance(self._model.loss, six.string_types): loss_function = getattr(k, self._model.loss) elif "__name__" in dir( self._model.loss) and self._model.loss.__name__ in [ "categorical_hinge", "categorical_crossentropy", "sparse_categorical_crossentropy", "binary_crossentropy", "kullback_leibler_divergence", ]: if self._model.loss.__name__ in [ "categorical_hinge", "kullback_leibler_divergence", ]: loss_function = getattr(keras.losses, self._model.loss.__name__) else: loss_function = getattr(keras.backend, self._model.loss.__name__) elif isinstance( self._model.loss, ( keras.losses.CategoricalHinge, keras.losses.CategoricalCrossentropy, keras.losses.SparseCategoricalCrossentropy, keras.losses.BinaryCrossentropy, keras.losses.KLDivergence, ), ): loss_function = self._model.loss else: loss_function = getattr(k, self._model.loss.__name__) # Check if loss function is an instance of loss function generator, the try is required because some of the # modules are not available in older Keras versions try: flag_is_instance = isinstance( loss_function, ( keras.losses.CategoricalHinge, keras.losses.CategoricalCrossentropy, keras.losses.BinaryCrossentropy, keras.losses.KLDivergence, ), ) except AttributeError: flag_is_instance = False # Check if the labels have to be reduced to index labels and create placeholder for labels if ("__name__" in dir(loss_function) and loss_function.__name__ in [ "categorical_hinge", "categorical_crossentropy", "binary_crossentropy", "kullback_leibler_divergence", ]) or (self.is_tensorflow and flag_is_instance): self._reduce_labels = False label_ph = k.placeholder(shape=self._output.shape) elif ("__name__" in dir(loss_function) and loss_function.__name__ in ["sparse_categorical_crossentropy"]) or isinstance( loss_function, keras.losses.SparseCategoricalCrossentropy): self._reduce_labels = True label_ph = k.placeholder(shape=[ None, ]) else: raise ValueError("Loss function not recognised.") # Define the loss using the loss function if "__name__" in dir(loss_function, ) and loss_function.__name__ in [ "categorical_crossentropy", "sparse_categorical_crossentropy", "binary_crossentropy", ]: loss_ = loss_function(label_ph, self._output, from_logits=self._use_logits) elif "__name__" in dir(loss_function) and loss_function.__name__ in [ "categorical_hinge", "kullback_leibler_divergence", ]: loss_ = loss_function(label_ph, self._output) elif isinstance( loss_function, ( keras.losses.CategoricalHinge, keras.losses.CategoricalCrossentropy, keras.losses.SparseCategoricalCrossentropy, keras.losses.KLDivergence, keras.losses.BinaryCrossentropy, ), ): loss_ = loss_function(label_ph, self._output) # Define loss gradients loss_gradients = k.gradients(loss_, self._input) if k.backend() == "tensorflow": loss_gradients = loss_gradients[0] elif k.backend() == "cntk": raise NotImplementedError( "Only TensorFlow is supported as backend for Keras.") # Set loss, gradients and prediction functions self._predictions_op = self._output self._loss = loss_ self._loss_gradients = k.function([self._input, label_ph], [loss_gradients]) # Get the internal layer self._layer_names = self._get_layers()
import os import warnings import tensorflow as tf import time import logging with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) logging.getLogger('tensorflow').setLevel(logging.FATAL) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" stats_file = "tf_stats_data_parallel_mnist.csv" tf.executing_eagerly() (mnist_train_images, mnist_train_labels), ( (mnist_test_images, mnist_test_labels)) = tf.keras.datasets.mnist.load_data() sequential_mini_batch_size = 600 # picking constant mini-batch size mini_batch_size = 25 print(mnist_train_images.shape, mnist_train_labels.shape, mnist_test_images.shape, mnist_test_labels.shape) mnist_train_images_local = mnist_train_images mnist_train_labels_local = mnist_train_labels
import tensorflow as tf from tensorflow import keras import matplotlib.pyplot as plt import numpy as np print(50 * '*' + ' Parte 2 ' + 50 * '*') #tensorflow version being used print('TF version:', tf.__version__) #is tf executing eagerly? print(tf.executing_eagerly()) #load mnist training and test data (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() #we have 10 labels (0:Zero, 1:One, 2:Two, 3:Three, 4:Four, 5:Five, 6:Six, 7:Seven, 8:Eight, 9:Nine) #each image is mapped to one single label. class names are not included in the dataset labels = [ 'Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine' ] #data shape and cardinality print('Train set shape', x_train.shape) print('Train labels shape', y_train.shape) print('Test set shape', x_test.shape) print('Test labels shape', y_test.shape) print('Number of training samples', len(x_train)) print('Number of testing samples', len(x_test)) #show a figure
def __init__(self, filename, columns=None, internal=True): """ParquetIODataset.""" assert internal with tf.name_scope("ParquetIODataset"): components, shapes, dtypes = core_ops.io_parquet_readable_info( filename, shared=filename, container="ParquetIODataset" ) def component_f(components, column): component = tf.boolean_mask( components, tf.math.equal(components, column) )[0] return component def shape_f(shapes, components, column): shape = tf.boolean_mask(shapes, tf.math.equal(components, column))[0] shape = tf.boolean_mask(shape, tf.math.greater_equal(shape, 0)) return shape def dtype_f(dtypes, components, column): dtype = tf.boolean_mask(dtypes, tf.math.equal(components, column))[0] dtype = tf.as_dtype(dtype.numpy()) return dtype if not tf.executing_eagerly(): if columns is None or not isinstance(columns, dict): raise ValueError( "The `columns` parameter can only be " "a dictionary in graph execution, mapping " "feature names to `tf.TensorSpec`." ) shapes = [shape_f(shapes, components, column) for column in columns] dtypes = [ spec if isinstance(spec, tf.dtypes.DType) else spec.dtype for column, spec in columns.items() ] components = [component_f(components, column) for column in columns] column_names = list(columns.keys()) elif columns is not None: shapes = [shape_f(shapes, components, column) for column in columns] dtypes = [dtype_f(dtypes, components, column) for column in columns] components = ( list(columns.keys()) if isinstance(columns, dict) else columns ) column_names = components else: shapes = tf.unstack(shapes) dtypes = [tf.as_dtype(dtype.numpy()) for dtype in tf.unstack(dtypes)] components = [component.numpy() for component in tf.unstack(components)] column_names = components self._filename = filename self._components = components self._shapes = shapes self._dtypes = dtypes def dataset_f(component, shape, dtype): step = 4096 indices_start = tf.data.Dataset.range(0, shape[0], step) indices_stop = indices_start.skip(1).concatenate( tf.data.Dataset.from_tensor_slices( tf.convert_to_tensor([shape[0]], tf.int64) ) ) dataset = tf.data.Dataset.zip((indices_start, indices_stop)) def f(start, stop): return core_ops.io_parquet_readable_read( input=self._filename, shared=self._filename, component=component, shape=shape, start=start, stop=stop, dtype=dtype, container="ParquetIODataset", ) dataset = dataset.map(f) dataset = dataset.unbatch() return dataset entries = list(zip(components, shapes, dtypes)) datasets = [ dataset_f(component, shape, dtype) for component, shape, dtype in entries ] self._dataset = tf.data.Dataset.zip( collections.OrderedDict(list(zip(column_names, datasets))) ) # Override the default `element_spec` with given specs if available. if isinstance(columns, dict) and all( isinstance(val, tf.TensorSpec) for val in columns.values() ): self._element_spec = collections.OrderedDict(columns) else: self._element_spec = None super().__init__( self._dataset._variant_tensor ) # pylint: disable=protected-access
def run_customized_training_loop( # pylint: disable=invalid-name _sentinel=None, # pylint: enable=invalid-name strategy=None, model_fn=None, loss_fn=None, scale_loss=True, model_dir=None, train_input_fn=None, steps_per_epoch=None, steps_per_loop=None, epochs=1, eval_input_fn=None, eval_steps=None, metric_fn=None, init_checkpoint=None, custom_callbacks=None, run_eagerly=False, sub_model_export_name=None, explicit_allreduce=False, pre_allreduce_callbacks=None, post_allreduce_callbacks=None, train_summary_interval=0): """Run BERT pretrain model training using low-level API. Arguments: _sentinel: Used to prevent positional parameters. Internal, do not use. strategy: Distribution strategy on which to run low level training loop. model_fn: Function that returns a tuple (model, sub_model). Caller of this function should add optimizer to the `model` via calling `model.compile()` API or manually setting `model.optimizer` attribute. Second element of the returned tuple(sub_model) is an optional sub model to be used for initial checkpoint -- if provided. loss_fn: Function with signature func(labels, logits) and returns a loss tensor. scale_loss: Whether to divide the raw loss by number of replicas before gradients calculation. model_dir: Model directory used during training for restoring/saving model weights. train_input_fn: Function that returns a tf.data.Dataset used for training. steps_per_epoch: Number of steps to run per epoch. At the end of each epoch, model checkpoint will be saved and evaluation will be conducted if evaluation dataset is provided. steps_per_loop: Number of steps per graph-mode loop. In order to reduce communication in eager context, training logs are printed every steps_per_loop. epochs: Number of epochs to train. eval_input_fn: Function that returns evaluation dataset. If none, evaluation is skipped. eval_steps: Number of steps to run evaluation. Required if `eval_input_fn` is not none. metric_fn: A metrics function that returns a Keras Metric object to record evaluation result using evaluation dataset or with training dataset after every epoch. init_checkpoint: Optional checkpoint to load to `sub_model` returned by `model_fn`. custom_callbacks: A list of Keras Callbacks objects to run during training. More specifically, `on_batch_begin()`, `on_batch_end()`, `on_epoch_begin()`, `on_epoch_end()` methods are invoked during training. Note that some metrics may be missing from `logs`. run_eagerly: Whether to run model training in pure eager execution. This should be disable for TPUStrategy. sub_model_export_name: If not None, will export `sub_model` returned by `model_fn` into checkpoint files. The name of intermediate checkpoint file is {sub_model_export_name}_step_{step}.ckpt and the last checkpint's name is {sub_model_export_name}.ckpt; if None, `sub_model` will not be exported as checkpoint. explicit_allreduce: Whether to explicitly perform gradient allreduce, instead of relying on implicit allreduce in optimizer.apply_gradients(). default is False. For now, if training using FP16 mixed precision, explicit allreduce will aggregate gradients in FP16 format. For TPU and GPU training using FP32, explicit allreduce will aggregate gradients in FP32 format. pre_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables paris. The callback functions will be invoked in the list order and before gradients are allreduced. With mixed precision training, the pre_allreduce_allbacks will be applied on scaled_gradients. Default is no callbacks. Only used when explicit_allreduce=True. post_allreduce_callbacks: A list of callback functions that takes gradients and model variables pairs as input, manipulate them, and returns a new gradients and model variables paris. The callback functions will be invoked in the list order and right before gradients are applied to variables for updates. Default is no callbacks. Only used when explicit_allreduce=True. train_summary_interval: Step interval for training summaries. If the value is a negative number, then training summaries are not enabled. Returns: Trained model. Raises: ValueError: (1) When model returned by `model_fn` does not have optimizer attribute or when required parameters are set to none. (2) eval args are not specified correctly. (3) metric_fn must be a callable if specified. (4) sub_model_checkpoint_name is specified, but `sub_model` returned by `model_fn` is None. """ if _sentinel is not None: raise ValueError('only call `run_customized_training_loop()` ' 'with named arguments.') required_arguments = [ strategy, model_fn, loss_fn, model_dir, steps_per_epoch, train_input_fn ] if [arg for arg in required_arguments if arg is None]: raise ValueError('`strategy`, `model_fn`, `loss_fn`, `model_dir`, ' '`steps_per_epoch` and `train_input_fn` are required ' 'parameters.') if not steps_per_loop: if tf.config.list_logical_devices('TPU'): # One can't fully utilize a TPU with steps_per_loop=1, so in this case # default users to a more useful value. steps_per_loop = min(1000, steps_per_epoch) else: steps_per_loop = 1 logging.info('steps_per_loop not specified. Using steps_per_loop=%d', steps_per_loop) if steps_per_loop > steps_per_epoch: logging.warning( 'steps_per_loop: %d is specified to be greater than ' ' steps_per_epoch: %d, we will use steps_per_epoch as' ' steps_per_loop.', steps_per_loop, steps_per_epoch) steps_per_loop = steps_per_epoch assert tf.executing_eagerly() if run_eagerly: if isinstance(strategy, tf.distribute.experimental.TPUStrategy): raise ValueError( 'TPUStrategy should not run eagerly as it heavily relies on graph' ' optimization for the distributed system.') if eval_input_fn and (eval_steps is None or metric_fn is None): raise ValueError( '`eval_step` and `metric_fn` are required when `eval_input_fn ` ' 'is not none.') if metric_fn and not callable(metric_fn): raise ValueError( 'if `metric_fn` is specified, metric_fn must be a callable.') callback_list = tf.keras.callbacks.CallbackList(custom_callbacks) total_training_steps = steps_per_epoch * epochs train_iterator = _get_input_iterator(train_input_fn, strategy) eval_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) with distribution_utils.get_strategy_scope(strategy): # To correctly place the model weights on accelerators, # model and optimizer should be created in scope. model, sub_model = model_fn() if not hasattr(model, 'optimizer'): raise ValueError('User should set optimizer attribute to model ' 'inside `model_fn`.') if sub_model_export_name and sub_model is None: raise ValueError('sub_model_export_name is specified as %s, but ' 'sub_model is None.' % sub_model_export_name) optimizer = model.optimizer if init_checkpoint: logging.info( 'Checkpoint file %s found and restoring from ' 'initial checkpoint for core model.', init_checkpoint) checkpoint = tf.train.Checkpoint(model=sub_model) checkpoint.restore( init_checkpoint).assert_existing_objects_matched() logging.info('Loading from checkpoint file completed') train_loss_metric = tf.keras.metrics.Mean('training_loss', dtype=tf.float32) eval_metrics = [metric_fn()] if metric_fn else [] # If evaluation is required, make a copy of metric as it will be used by # both train and evaluation. train_metrics = [ metric.__class__.from_config(metric.get_config()) for metric in eval_metrics ] # Create summary writers if _should_export_summary(strategy): summary_dir = os.path.join(model_dir, 'summaries') else: # In multi worker training we need every worker to write summary, because # variables can trigger synchronization on read and synchronization needs # all workers to participate. summary_dir = tempfile.mkdtemp() eval_summary_writer = tf.summary.create_file_writer( os.path.join(summary_dir, 'eval')) last_summary_step = 0 if steps_per_loop >= _MIN_SUMMARY_STEPS and train_summary_interval >= 0: # Only writes summary when the stats are collected sufficiently over # enough steps. train_summary_writer = tf.summary.create_file_writer( os.path.join(summary_dir, 'train')) else: train_summary_writer = tf.summary.create_noop_writer() # Collects training variables. training_vars = model.trainable_variables def _replicated_step(inputs): """Replicated training step.""" inputs, labels = inputs with tf.GradientTape() as tape: model_outputs = model(inputs, training=True) loss = loss_fn(labels, model_outputs) # Raw loss is used for reporting in metrics/logs. raw_loss = loss if scale_loss: # Scales down the loss for gradients to be invariant from replicas. loss = loss / strategy.num_replicas_in_sync if explicit_allreduce: grad_utils.minimize_using_explicit_allreduce( tape, optimizer, loss, training_vars, pre_allreduce_callbacks, post_allreduce_callbacks) else: if isinstance( optimizer, tf.keras.mixed_precision.experimental. LossScaleOptimizer): with tape: scaled_loss = optimizer.get_scaled_loss(loss) scaled_grads = tape.gradient(scaled_loss, training_vars) grads = optimizer.get_unscaled_gradients(scaled_grads) else: grads = tape.gradient(loss, training_vars) optimizer.apply_gradients(zip(grads, training_vars)) # For reporting, the metric takes the mean of losses. train_loss_metric.update_state(raw_loss) for metric in train_metrics: metric.update_state(labels, model_outputs) @tf.function def train_steps(iterator, steps): """Performs distributed training steps in a loop. Args: iterator: the distributed iterator of training datasets. steps: an tf.int32 integer tensor to specify number of steps to run inside host training loop. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ if not isinstance(steps, tf.Tensor): raise ValueError( 'steps should be an Tensor. Python object may cause ' 'retracing.') for _ in tf.range(steps): strategy.run(_replicated_step, args=(next(iterator), )) def train_single_step(iterator): """Performs a distributed training step. Args: iterator: the distributed iterator of training datasets. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ strategy.run(_replicated_step, args=(next(iterator), )) def test_step(iterator): """Calculates evaluation metrics on distributed devices.""" def _test_step_fn(inputs): """Replicated accuracy calculation.""" inputs, labels = inputs model_outputs = model(inputs, training=False) for metric in eval_metrics: metric.update_state(labels, model_outputs) return model_outputs, labels outputs, labels = strategy.run(_test_step_fn, args=(next(iterator), )) outputs = tf.nest.map_structure( strategy.experimental_local_results, outputs) labels = tf.nest.map_structure(strategy.experimental_local_results, labels) return outputs, labels if not run_eagerly: train_single_step = tf.function(train_single_step) test_step = tf.function(test_step) def _run_evaluation(current_training_step, test_iterator): """Runs validation steps and aggregate metrics. Args: current_training_step: tf.int32 tensor containing the current step. test_iterator: distributed iterator of test datasets. Returns: A dict of metic names and values. """ # The last batch of the evaluation is often smaller than previous ones. # Moreover, in some distributed pieces it might even be empty. Therefore, # different from the way training_loss is calculated, it is needed to # gather all the logits and labels here to calculate the evaluation loss # outside. loss_list, loss_weights = list(), list() for _ in range(eval_steps): outputs, labels = test_step(test_iterator) for cur_logits, cur_labels in zip(outputs, labels): # This is to handle cases when cur_labels is not a single tensor, # but a dict of tensors. cur_weight = tf.shape(tf.nest.flatten(cur_labels)[0])[0] if cur_weight != 0: loss_list.append( loss_fn(cur_labels, cur_logits).numpy()) loss_weights.append(cur_weight) # The sample_weights are the actual number of examples in each batch, # a summation of numbers of examples in each replica if using # distributed training. eval_loss_metric.update_state(loss_list, sample_weight=loss_weights) logs = {} with eval_summary_writer.as_default(): for metric in [eval_loss_metric ] + eval_metrics + model.metrics: metric_value = _float_metric_value(metric) logs[metric.name] = metric_value logging.info('Step: [%d] Validation %s = %f', current_training_step, metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_training_step) eval_summary_writer.flush() return logs # Training loop starts here. checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer, global_step=optimizer.iterations) sub_model_checkpoint = tf.train.Checkpoint( model=sub_model, global_step=optimizer.iterations ) if sub_model_export_name else None latest_checkpoint_file = tf.train.latest_checkpoint(model_dir) if latest_checkpoint_file: logging.info( 'Checkpoint file %s found and restoring from ' 'checkpoint', latest_checkpoint_file) checkpoint.restore(latest_checkpoint_file) logging.info('Loading from checkpoint file completed') current_step = optimizer.iterations.numpy() checkpoint_name = 'ctl_step_{step}.ckpt' while current_step < total_training_steps: if current_step % steps_per_epoch == 0: callback_list.on_epoch_begin( int(current_step / steps_per_epoch) + 1) # Training loss/metric are taking average over steps inside micro # training loop. We reset the their values before each round. train_loss_metric.reset_states() for metric in train_metrics + model.metrics: metric.reset_states() callback_list.on_batch_begin(current_step) # Runs several steps in the host while loop. steps = steps_to_run(current_step, steps_per_epoch, steps_per_loop) if tf.config.list_physical_devices('GPU'): # TODO(zongweiz): merge with train_steps once tf.while_loop # GPU performance bugs are fixed. for _ in range(steps): train_single_step(train_iterator) else: # Converts steps to a Tensor to avoid tf.function retracing. train_steps(train_iterator, tf.convert_to_tensor(steps, dtype=tf.int32)) train_loss = _float_metric_value(train_loss_metric) current_step += steps callback_list.on_batch_end(current_step - 1, {'loss': train_loss}) # Updates training logging. training_status = 'Train Step: %d/%d / loss = %s' % ( current_step, total_training_steps, train_loss) if current_step >= last_summary_step + train_summary_interval: summary_writer = train_summary_writer last_summary_step = current_step else: summary_writer = tf.summary.create_noop_writer() with summary_writer.as_default(): tf.summary.scalar(train_loss_metric.name, train_loss, step=current_step) for metric in train_metrics + model.metrics: metric_value = _float_metric_value(metric) training_status += ' %s = %f' % (metric.name, metric_value) tf.summary.scalar(metric.name, metric_value, step=current_step) summary_writer.flush() logging.info(training_status) if current_step % steps_per_epoch == 0: # Save a submodel with the step in the file name after each epoch. if sub_model_export_name: _save_checkpoint( strategy, sub_model_checkpoint, model_dir, '%s_step_%d.ckpt' % (sub_model_export_name, current_step)) # Save model checkpoints and run validation steps after each epoch # (with the exception of the final epoch which is handled after the # training loop). if current_step < total_training_steps: _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) logs = None if eval_input_fn: logging.info('Running evaluation after step: %s.', current_step) logs = _run_evaluation( current_step, _get_input_iterator(eval_input_fn, strategy)) # Re-initialize evaluation metric. eval_loss_metric.reset_states() for metric in eval_metrics + model.metrics: metric.reset_states() callback_list.on_epoch_end( int(current_step / steps_per_epoch), logs) if sub_model_export_name: _save_checkpoint(strategy, sub_model_checkpoint, model_dir, '%s.ckpt' % sub_model_export_name) _save_checkpoint(strategy, checkpoint, model_dir, checkpoint_name.format(step=current_step)) logs = None if eval_input_fn: logging.info( 'Running final evaluation after training is complete.') logs = _run_evaluation( current_step, _get_input_iterator(eval_input_fn, strategy)) callback_list.on_epoch_end(int(current_step / steps_per_epoch), logs) training_summary = { 'total_training_steps': total_training_steps, 'train_loss': _float_metric_value(train_loss_metric), } for metric in model.metrics: training_summary[metric.name] = _float_metric_value(metric) if eval_metrics: # TODO(hongkuny): Cleans up summary reporting in text. training_summary['last_train_metrics'] = _float_metric_value( train_metrics[0]) training_summary['eval_metrics'] = _float_metric_value( eval_metrics[0]) write_txt_summary(training_summary, summary_dir) if not _should_export_summary(strategy): tf.io.gfile.rmtree(summary_dir) return model
#%% import tensorflow as tf from tensorflow import keras import numpy as np import matplotlib.pyplot as plt print(tf.__version__) print("즉시 실행 모드: ", tf.executing_eagerly()) print("GPU ", "사용 가능" if tf.config.experimental.list_physical_devices("GPU") else "사용 불가능") # keras.datasets.mnist #%%load data fashion_mnist = keras.datasets.fashion_mnist (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() # print(train_images[0],train_labels[0]) print(train_images.shape)
def train(self, check_prior, **name_to_observation_or_conditioning): name_to_observation, name_to_conditioning = self._split_observations_and_conditioning( name_to_observation_or_conditioning) if check_prior: # Build the 'prior', i.e. the generative without variational substitutions name_to_unconditioned_generative_rv = {} def rv_to_value(rv, name): assert name not in name_to_unconditioned_generative_rv, 'duplicate variable {} in unconditioned generative'.format( name) name_to_unconditioned_generative_rv[name] = rv return rv.sample() def add_loss(_): assert False, 'adding a loss in the unconditioned generative is not supported' # in theory we could make this do the obvious thing (just add the loss), but the behaviour is strange given that all other calls to add_loss are integrated-over with tf.name_scope('generative/unconditioned'): unconditioned_generative = self.generative( _RvBuilder(rv_to_value), GenerativeMode.UNCONDITIONED, add_loss, **name_to_conditioning) for name in name_to_observation: assert name in name_to_unconditioned_generative_rv, 'observed variable {} not present in generative'.format( name) for name in self.integrated_name_to_values: assert name in name_to_unconditioned_generative_rv, 'integrated variable {} not present in generative'.format( name) assert name not in name_to_observation, 'integrated variable {} may not also be observed'.format( name) if tf.executing_eagerly(): random_seed = int( tf.random.uniform([], 0, tf.int32.max, dtype=tf.int32)) elif len(self.integrated_name_to_values) > 0: print( 'warning: IntegratedEagerKlqp does not reuse randomness correctly in graph mode!' ) total_weighted_log_Px = 0. total_weighted_log_Pz = 0. total_weighted_log_Qz = 0. total_weighted_additional_loss = 0. for integrated_name_to_value in self._generate_integrated_values(): # Ensure that we use the same values for all random variables (up to conditioning on the integrated variables) for each # value of the integrated-over variables in the summation if tf.executing_eagerly(): tf.random.set_random_seed( random_seed ) # note that this relies on us actually being in eager mode! additional_losses = [] def add_loss(loss): additional_losses.append(loss) # Build the variational, also using variational samples for ancestral substitutions name_to_substituted_value = dict(name_to_observation) name_to_variational_rv = {} def rv_to_value(rv, name): if check_prior: assert name in name_to_unconditioned_generative_rv, 'variational rv {} not present in generative'.format( name) assert name not in name_to_variational_rv, '{} already has variational binding'.format( name) assert name not in name_to_observation, '{} may not be given by variational, as it is observed'.format( name) name_to_variational_rv[name] = rv if name in integrated_name_to_value: substituted_value = integrated_name_to_value[name] else: substituted_value = rv.sample() name_to_substituted_value[name] = substituted_value return substituted_value with tf.name_scope('variational/conditioned'): self.variational(_RvBuilder(rv_to_value), add_loss, **name_to_observation, **name_to_conditioning) # Build the 'conditioned generative', with values substituted from the variational and observations name_to_conditioned_generative_rv = {} def rv_to_value(rv, name): assert name not in name_to_conditioned_generative_rv, 'duplicate variable {} in conditioned generative'.format( name) if name not in name_to_substituted_value: assert name not in integrated_name_to_value, 'variable {} is integrated over, but has no variational distribution; this case is not supported'.format( name) print( 'warning: variable {} has neither variational distribution nor observed value, hence will be marginalised by sampling' .format(name)) name_to_substituted_value[name] = rv.sample() name_to_conditioned_generative_rv[name] = rv return name_to_substituted_value[name] with tf.name_scope('generative/conditioned'): self.generative(_RvBuilder(rv_to_value), GenerativeMode.CONDITIONED, add_loss, **name_to_conditioning) def mean_over_nonbatch_axes(x): shape = x.shape if len( shape ) < 2: # should never be zero; ideally it is always one, if batch-vs-event indexing of RVs is correct return x else: return tf.reduce_mean(x, axis=tuple(range(1, len(shape)))) log_Px = sum( mean_over_nonbatch_axes( name_to_conditioned_generative_rv[name].log_prob( name_to_substituted_value[name])) for name in name_to_observation) log_Pz = sum( mean_over_nonbatch_axes( name_to_conditioned_generative_rv[name].log_prob( name_to_substituted_value[name])) for name in name_to_variational_rv # variational not generative so we only include things with variational (not prior) substitutions if name not in name_to_observation # ...as it's in P(x) instead ) log_Qz = sum( mean_over_nonbatch_axes(name_to_variational_rv[name].log_prob( name_to_substituted_value[name])) for name in name_to_variational_rv) Q_integrated_values = tf.exp( sum([ mean_over_nonbatch_axes( name_to_variational_rv[name].log_prob(value)) for name, value in integrated_name_to_value.items() ], 0.)) # :: iib additional_loss = sum(additional_losses) total_weighted_log_Px += tf.reduce_mean(Q_integrated_values * log_Px) total_weighted_log_Pz += tf.reduce_mean(Q_integrated_values * log_Pz) total_weighted_log_Qz += tf.reduce_mean(Q_integrated_values * log_Qz) total_weighted_additional_loss += tf.reduce_mean( Q_integrated_values * additional_loss) beta = self.beta() if callable(self.beta) else self.beta loss = -(total_weighted_log_Px + beta * (total_weighted_log_Pz - total_weighted_log_Qz) ) + total_weighted_additional_loss # :: iib if self.verbose: if tf.executing_eagerly( ): # if we can, print with nice formatting (i.e. two decimal places) print( 'log P(x) = {:.2f}, beta * KL= {:.2f} (log P(z) = {:.2f}, log Q(z) = {:.2f}), L* = {:.2f}, total loss = {:.2f}' .format( total_weighted_log_Px, beta * (total_weighted_log_Pz - total_weighted_log_Qz), total_weighted_log_Pz, total_weighted_log_Qz, total_weighted_additional_loss, loss)) else: tf.print('log P(x) = ', total_weighted_log_Px, ', beta * KL= ', beta * (total_weighted_log_Pz - total_weighted_log_Qz), ' (log P(z) = ', total_weighted_log_Pz, ', log Q(z) = ', total_weighted_log_Qz, '), L* = ', total_weighted_additional_loss, ', total loss = ', loss, sep='') if check_prior: return loss, unconditioned_generative else: return loss
def loss_gradient(self, x: np.ndarray, y: np.ndarray, training_mode: bool = False, **kwargs) -> np.ndarray: """ Compute the gradient of the loss function w.r.t. `x`. :param x: Sample input with shape as expected by the model. :param y: Correct labels, one-vs-rest encoding. :param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode. :param sampling: True if loss gradients should be determined with Monte Carlo sampling. :type sampling: `bool` :return: Array of gradients of the same shape as `x`. """ import tensorflow as tf # lgtm [py/repeated-import] sampling = kwargs.get("sampling") if sampling: # Apply preprocessing x_preprocessed, _ = self._apply_preprocessing(x, y, fit=False) if tf.executing_eagerly(): with tf.GradientTape() as tape: inputs_t = tf.convert_to_tensor(x_preprocessed) tape.watch(inputs_t) inputs_repeat_t = tf.repeat(inputs_t, repeats=self.sample_size, axis=0) noise = tf.random.normal( shape=inputs_repeat_t.shape, mean=0.0, stddev=self.scale, dtype=inputs_repeat_t.dtype, seed=None, name=None, ) inputs_noise_t = inputs_repeat_t + noise if self.clip_values is not None: inputs_noise_t = tf.clip_by_value( inputs_noise_t, clip_value_min=self.clip_values[0], clip_value_max=self.clip_values[1], name=None, ) model_outputs = self._model(inputs_noise_t, training=training_mode) softmax = tf.nn.softmax(model_outputs, axis=1, name=None) average_softmax = tf.reduce_mean( tf.reshape(softmax, shape=(-1, self.sample_size, model_outputs.shape[-1])), axis=1 ) loss = tf.reduce_mean( tf.keras.losses.categorical_crossentropy( y_true=y, y_pred=average_softmax, from_logits=False, label_smoothing=0 ) ) gradients = tape.gradient(loss, inputs_t).numpy() else: # pragma: no cover raise ValueError("Expecting eager execution.") # Apply preprocessing gradients gradients = self._apply_preprocessing_gradient(x, gradients) else: gradients = TensorFlowV2Classifier.loss_gradient(self, x=x, y=y, training_mode=training_mode, **kwargs) return gradients
def test_sync_replicas(self, create_gan_model_fn, create_global_step): if tf.executing_eagerly(): # None of the usual utilities work in eager. return model = create_gan_model_fn() loss = tfgan.gan_loss(model) num_trainable_vars = len(get_trainable_variables()) if create_global_step: gstep = tf.compat.v1.get_variable( 'custom_gstep', dtype=tf.int32, initializer=0, trainable=False) tf.compat.v1.add_to_collection(tf.compat.v1.GraphKeys.GLOBAL_STEP, gstep) g_opt = get_sync_optimizer() d_opt = get_sync_optimizer() train_ops = tfgan.gan_train_ops( model, loss, generator_optimizer=g_opt, discriminator_optimizer=d_opt) self.assertIsInstance(train_ops, tfgan.GANTrainOps) # No new trainable variables should have been added. self.assertLen(get_trainable_variables(), num_trainable_vars) # Sync hooks should be populated in the GANTraintf. self.assertLen(train_ops.train_hooks, 2) for hook in train_ops.train_hooks: self.assertIsInstance(hook, get_sync_optimizer_hook_type()) sync_opts = [hook._sync_optimizer for hook in train_ops.train_hooks] self.assertSetEqual(frozenset(sync_opts), frozenset((g_opt, d_opt))) g_sync_init_op = g_opt.get_init_tokens_op(num_tokens=1) d_sync_init_op = d_opt.get_init_tokens_op(num_tokens=1) # Check that update op is run properly. global_step = tf.compat.v1.train.get_or_create_global_step() with self.cached_session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) sess.run(g_opt.chief_init_op) sess.run(d_opt.chief_init_op) gstep_before = sess.run(global_step) # Start required queue runner for SyncReplicasOptimizer. coord = tf.train.Coordinator() g_threads = g_opt.get_chief_queue_runner().create_threads(sess, coord) d_threads = d_opt.get_chief_queue_runner().create_threads(sess, coord) sess.run(g_sync_init_op) sess.run(d_sync_init_op) sess.run(train_ops.generator_train_op) # Check that global step wasn't incremented. self.assertEqual(gstep_before, sess.run(global_step)) sess.run(train_ops.discriminator_train_op) # Check that global step wasn't incremented. self.assertEqual(gstep_before, sess.run(global_step)) coord.request_stop() coord.join(g_threads + d_threads)
def testMatrixEvent(self): batch_shape = [2] event_shape = [2, 3, 3] batch_shape_pl = tf.compat.v1.placeholder_with_default( input=np.int32(batch_shape), shape=None, name="dynamic_batch_shape") event_shape_pl = tf.compat.v1.placeholder_with_default( input=np.int32(event_shape), shape=None, name="dynamic_event_shape") scale = 2. loc = 0. fake_mvn_dynamic = self._cls()( distribution=tfd.Normal(loc=loc, scale=scale), bijector=DummyMatrixTransform(), batch_shape=batch_shape_pl, event_shape=event_shape_pl, validate_args=True) fake_mvn_static = self._cls()( distribution=tfd.Normal(loc=loc, scale=scale), bijector=DummyMatrixTransform(), batch_shape=batch_shape, event_shape=event_shape, validate_args=True) def actual_mvn_log_prob(x): # This distribution is the normal PDF, reduced over the # last 3 dimensions + a jacobian term which corresponds # to the determinant of x. return (np.sum(stats.norm(loc, scale).logpdf(x), axis=(-1, -2, -3)) + np.sum(np.linalg.det(x), axis=-1)) self.assertAllEqual([2, 3, 3], fake_mvn_static.event_shape) self.assertAllEqual([2], fake_mvn_static.batch_shape) if not tf.executing_eagerly(): self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.event_shape) self.assertAllEqual(tf.TensorShape(None), fake_mvn_dynamic.batch_shape) num_samples = 5e3 for fake_mvn in [fake_mvn_static, fake_mvn_dynamic]: # Ensure sample works by checking first, second moments. y = fake_mvn.sample(int(num_samples), seed=0) x = y[0:5, ...] [ x_, fake_event_shape_, fake_batch_shape_, fake_log_prob_, fake_prob_, ] = self.evaluate([ x, fake_mvn.event_shape_tensor(), fake_mvn.batch_shape_tensor(), fake_mvn.log_prob(x), fake_mvn.prob(x), ]) # Ensure all other functions work as intended. self.assertAllEqual([5, 2, 2, 3, 3], x_.shape) self.assertAllEqual([2, 3, 3], fake_event_shape_) self.assertAllEqual([2], fake_batch_shape_) self.assertAllClose( actual_mvn_log_prob(x_), fake_log_prob_, atol=0., rtol=1e-6) self.assertAllClose( np.exp(actual_mvn_log_prob(x_)), fake_prob_, atol=0., rtol=1e-5)
def test_output_type(self, create_fn, expected_tuple_type): """Test that output type is as expected.""" if tf.executing_eagerly(): # None of the usual utilities work in eager. return self.assertIsInstance(create_fn(), expected_tuple_type)
def test_eval_count(self): """Tests that the evaluation count is reported correctly.""" if tf.executing_eagerly(): self._test_eval_count_eager() else: self._test_eval_count_graph()
input_filenames_or_glob, sample_rate, FLAGS.debug, FLAGS.embedding_names, FLAGS.embedding_modules, FLAGS.module_output_keys, FLAGS.audio_key, FLAGS.sample_rate_key, FLAGS.label_key, FLAGS.speaker_id_key, FLAGS.average_over_time, FLAGS.delete_audio_from_output, output_filename, input_format=input_format, output_format=output_format, suffix=i) if __name__ == '__main__': flags.mark_flags_as_required([ 'output_filename', 'embedding_names', 'embedding_modules', 'module_output_keys', 'audio_key', 'label_key', ]) flags.mark_flags_as_mutual_exclusive(['input_glob', 'tfds_dataset'], required=True) flags.mark_flags_as_mutual_exclusive( ['tfds_dataset', 'sample_rate_key', 'sample_rate'], required=True) tf.compat.v2.enable_v2_behavior() assert tf.executing_eagerly() app.run(main)
import sys import unittest import tensorflow as tf import tfdiffeq sys.path.insert(0, '..') from tests.problems import construct_problem from tests.check_grad import gradcheck if not tf.executing_eagerly(): tf.enable_v2_behavior() eps = 1e-5 # torch.set_default_dtype(torch.float64) TEST_DEVICE = "gpu:0" if tf.test.is_gpu_available() else "cpu:0" def max_abs(tensor): return tf.reduce_max(tf.abs(tensor)) class TestCollectionState(unittest.TestCase): def test_dopri5(self): f, y0, t_points, sol = construct_problem(TEST_DEVICE) tuple_f = lambda t, y: (f(t, y[0]), f(t, y[1])) tuple_y0 = (y0, y0) tuple_y = tfdiffeq.odeint(tuple_f, tuple_y0, t_points, method='dopri5')
def restore_models_and_optimizers_and_alpha(sess, gen_model, dis_model, mapping_network, sampling_model, optimizer_g, optimizer_d, optimizer_m, save_paths): """ :param sess: session if in graph mode, otherwise unused :param gen_model: generator with defined variables :param dis_model: discriminator with defined variables :param optimizer_g: generator's optimizer :param optimizer_d: discriminator's optimizer :param save_paths: paths containing models, optimizers, and alpha on disk :return: read alpha value """ if gen_model is not None: gen_model.load_weights(save_paths.gen_model, by_name=True) if dis_model is not None: dis_model.load_weights(save_paths.dis_model, by_name=True) if mapping_network is not None: mapping_network.load_weights(save_paths.mapping_network, by_name=True) if sampling_model is not None: sampling_model.load_weights(save_paths.sampling_model, by_name=True) if optimizer_g is not None: vars_g = filter_vars_with_checkpoint(chkpt_path=save_paths.gen_optim, var_list=optimizer_g.variables()) if optimizer_d is not None: vars_d = filter_vars_with_checkpoint(chkpt_path=save_paths.dis_optim, var_list=optimizer_d.variables()) if optimizer_m is not None and \ mapping_network is not None and \ os.path.exists(os.path.dirname(save_paths.mn_optim)): vars_mn = filter_vars_with_checkpoint(chkpt_path=save_paths.mn_optim, var_list=optimizer_m.variables()) if tf.executing_eagerly(): if optimizer_d is not None: saver_d = tf.contrib.eager.Saver(var_list=vars_d) saver_d.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.dis_optim))) if optimizer_g is not None: saver_g = tf.contrib.eager.Saver(var_list=vars_g) saver_g.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.gen_optim))) if optimizer_m is not None and os.path.exists(os.path.dirname(save_paths.mn_optim)): saver_g = tf.contrib.eager.Saver(var_list=vars_mn) saver_g.restore(file_prefix=tf.train.latest_checkpoint(os.path.dirname(save_paths.mn_optim))) else: if optimizer_d is not None: saver_d = tf.train.Saver(var_list=vars_d) saver_d.restore(sess=sess, save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.dis_optim))) if optimizer_g is not None: saver_g = tf.train.Saver(var_list=vars_g) saver_g.restore(sess=sess, save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.gen_optim))) if optimizer_m is not None and \ mapping_network is not None and \ os.path.exists(os.path.dirname(save_paths.mn_optim)): saver_g = tf.train.Saver(var_list=vars_mn) saver_g.restore(sess=sess, save_path=tf.train.latest_checkpoint(os.path.dirname(save_paths.mn_optim))) step = None alpha = None if save_paths.step is not None: with open(save_paths.step, "r") as f: step = int(f.read()) if save_paths.alpha is not None: with open(save_paths.alpha, "r") as f: alpha = float(f.read()) return alpha, step
def initialize(self): if tf.executing_eagerly(): return tf.no_op() else: return self._initializers
def get_estimator_eval_metric_ops(self, eval_dict): """Returns metric ops for use in tf.estimator.EstimatorSpec. Args: eval_dict: A dictionary that holds an image, groundtruth, and detections for a batched example. Note that, we use only the first example for visualization. See eval_util.result_dict_for_batched_example() for a convenient method for constructing such a dictionary. The dictionary contains fields.InputDataFields.original_image: [batch_size, H, W, 3] image. fields.InputDataFields.original_image_spatial_shape: [batch_size, 2] tensor containing the size of the original image. fields.InputDataFields.true_image_shape: [batch_size, 3] tensor containing the spatial size of the upadded original image. fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4] float32 tensor with groundtruth boxes in range [0.0, 1.0]. fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes] int64 tensor with 1-indexed groundtruth classes. fields.InputDataFields.groundtruth_instance_masks - (optional) [batch_size, num_boxes, H, W] int64 tensor with instance masks. fields.DetectionResultFields.detection_boxes - [batch_size, max_num_boxes, 4] float32 tensor with detection boxes in range [0.0, 1.0]. fields.DetectionResultFields.detection_classes - [batch_size, max_num_boxes] int64 tensor with 1-indexed detection classes. fields.DetectionResultFields.detection_scores - [batch_size, max_num_boxes] float32 tensor with detection scores. fields.DetectionResultFields.detection_masks - (optional) [batch_size, max_num_boxes, H, W] float32 tensor of binarized masks. fields.DetectionResultFields.detection_keypoints - (optional) [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with keypoints. Returns: A dictionary of image summary names to tuple of (value_op, update_op). The `update_op` is the same for all items in the dictionary, and is responsible for saving a single side-by-side image with detections and groundtruth. Each `value_op` holds the tf.summary.image string for a given image. """ if self._max_examples_to_draw == 0: return {} images = self.images_from_evaluation_dict(eval_dict) def get_images(): """Returns a list of images, padded to self._max_images_to_draw.""" images = self._images while len(images) < self._max_examples_to_draw: images.append(np.array(0, dtype=np.uint8)) self.clear() return images def image_summary_or_default_string(summary_name, image): """Returns image summaries for non-padded elements.""" return tf.cond(tf.equal(tf.size(tf.shape(image)), 4), lambda: tf.summary.image(summary_name, image), lambda: tf.constant('')) if tf.executing_eagerly(): update_op = self.add_images([[images[0]]]) image_tensors = get_images() else: update_op = tf.py_func(self.add_images, [[images[0]]], []) image_tensors = tf.py_func(get_images, [], [tf.uint8] * self._max_examples_to_draw) eval_metric_ops = {} for i, image in enumerate(image_tensors): summary_name = self._summary_name_prefix + '/' + str(i) value_op = image_summary_or_default_string(summary_name, image) eval_metric_ops[summary_name] = (value_op, update_op) return eval_metric_ops
def _testWithMaybeMultiAttention( self, is_multi, create_attention_mechanisms, expected_final_output, expected_final_state, attention_mechanism_depths, alignment_history=False, expected_final_alignment_history=None, attention_layer_sizes=None, attention_layers=None, create_query_layer=False, create_memory_layer=True, create_attention_kwargs=None, ): # Allow is_multi to be True with a single mechanism to enable test for # passing in a single mechanism in a list. assert len(create_attention_mechanisms) == 1 or is_multi encoder_sequence_length = [3, 2, 3, 1, 1] decoder_sequence_length = [2, 0, 1, 2, 3] batch_size = 5 encoder_max_time = 8 decoder_max_time = 4 input_depth = 7 encoder_output_depth = 10 cell_depth = 9 create_attention_kwargs = create_attention_kwargs or {} if attention_layer_sizes is not None: # Compute sum of attention_layer_sizes. Use encoder_output_depth if # None. attention_depth = sum( attention_layer_size or encoder_output_depth for attention_layer_size in attention_layer_sizes) elif attention_layers is not None: # Compute sum of attention_layers output depth. attention_depth = sum( attention_layer.compute_output_shape( [batch_size, cell_depth + encoder_output_depth]).dims[-1].value for attention_layer in attention_layers) else: attention_depth = encoder_output_depth * len( create_attention_mechanisms) decoder_inputs = np.random.randn(batch_size, decoder_max_time, input_depth).astype(np.float32) encoder_outputs = np.random.randn(batch_size, encoder_max_time, encoder_output_depth).astype( np.float32) attention_mechanisms = [] for creator, depth in zip(create_attention_mechanisms, attention_mechanism_depths): # Create a memory layer with deterministic initializer to avoid # randomness in the test between graph and eager. if create_query_layer: create_attention_kwargs["query_layer"] = tf.keras.layers.Dense( depth, kernel_initializer="ones", use_bias=False) if create_memory_layer: create_attention_kwargs[ "memory_layer"] = tf.keras.layers.Dense( depth, kernel_initializer="ones", use_bias=False) attention_mechanisms.append( creator( units=depth, memory=encoder_outputs, memory_sequence_length=encoder_sequence_length, **create_attention_kwargs, )) with self.cached_session(use_gpu=True): attention_layer_size = attention_layer_sizes attention_layer = attention_layers if not is_multi: if attention_layer_size is not None: attention_layer_size = attention_layer_size[0] if attention_layer is not None: attention_layer = attention_layer[0] cell = tf.keras.layers.LSTMCell( cell_depth, recurrent_activation="sigmoid", kernel_initializer="ones", recurrent_initializer="ones", ) cell = wrapper.AttentionWrapper( cell, attention_mechanisms if is_multi else attention_mechanisms[0], attention_layer_size=attention_layer_size, alignment_history=alignment_history, attention_layer=attention_layer, ) if cell._attention_layers is not None: for layer in cell._attention_layers: layer.kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform( seed=1337) sampler = sampler_py.TrainingSampler() my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler) initial_state = cell.get_initial_state(dtype=tf.float32, batch_size=batch_size) final_outputs, final_state, _ = my_decoder( decoder_inputs, initial_state=initial_state, sequence_length=decoder_sequence_length, ) self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput) self.assertIsInstance(final_state, wrapper.AttentionWrapperState) expected_time = (max(decoder_sequence_length) if tf.executing_eagerly() else None) self.assertEqual( (batch_size, expected_time, attention_depth), tuple(final_outputs.rnn_output.get_shape().as_list()), ) self.assertEqual( (batch_size, expected_time), tuple(final_outputs.sample_id.get_shape().as_list()), ) self.assertEqual( (batch_size, attention_depth), tuple(final_state.attention.get_shape().as_list()), ) self.assertEqual( (batch_size, cell_depth), tuple(final_state.cell_state[0].get_shape().as_list()), ) self.assertEqual( (batch_size, cell_depth), tuple(final_state.cell_state[1].get_shape().as_list()), ) if alignment_history: if is_multi: state_alignment_history = [] for history_array in final_state.alignment_history: history = history_array.stack() self.assertEqual( (expected_time, batch_size, encoder_max_time), tuple(history.get_shape().as_list()), ) state_alignment_history.append(history) state_alignment_history = tuple(state_alignment_history) else: state_alignment_history = final_state.alignment_history.stack( ) self.assertEqual( (expected_time, batch_size, encoder_max_time), tuple(state_alignment_history.get_shape().as_list()), ) tf.nest.assert_same_structure( cell.state_size, cell.get_initial_state(batch_size=batch_size, dtype=tf.float32), ) # Remove the history from final_state for purposes of the # remainder of the tests. final_state = final_state._replace(alignment_history=()) # pylint: disable=protected-access else: state_alignment_history = () self.evaluate(tf.compat.v1.global_variables_initializer()) eval_result = self.evaluate({ "final_outputs": final_outputs, "final_state": final_state, "state_alignment_history": state_alignment_history, }) final_output_info = tf.nest.map_structure( get_result_summary, eval_result["final_outputs"]) final_state_info = tf.nest.map_structure( get_result_summary, eval_result["final_state"]) print("final_output_info: ", final_output_info) print("final_state_info: ", final_state_info) tf.nest.map_structure(self.assertAllCloseOrEqual, expected_final_output, final_output_info) tf.nest.map_structure(self.assertAllCloseOrEqual, expected_final_state, final_state_info) # by default, the wrapper emits attention as output if alignment_history: final_alignment_history_info = tf.nest.map_structure( get_result_summary, eval_result["state_alignment_history"]) print("final_alignment_history_info: ", final_alignment_history_info) tf.nest.map_structure( self.assertAllCloseOrEqual, # outputs are batch major but the stacked TensorArray is # time major expected_final_alignment_history, final_alignment_history_info, )
# this is all preset automatically from __future__ import absolute_import, division, print_function import os import matplotlib.pyplot as plt import tensorflow as tf import tensorflow.contrib.eager as tfe tf.enable_eager_execution() print("TensorFlow version: {}".format(tf.VERSION)) print("Eager execution: {}".format(tf.executing_eagerly())) # # this can be uploaded by the user train_dataset_url = "http://download.tensorflow.org/data/iris_training.csv" train_dataset_fp = tf.keras.utils.get_file( fname=os.path.basename(train_dataset_url), origin=train_dataset_url) print("Local copy of the dataset file: {}".format(train_dataset_fp)) # # this depends on input - worry about later def parse_csv(line): example_defaults = [[0.], [0.], [0.], [0.], [0]] # sets field types parsed_line = tf.decode_csv(line, example_defaults) # First 4 fields are features, combine into single tensor features = tf.reshape(parsed_line[:-1], shape=(4, ))
#%% import tensorflow as tf import tensorflow.keras as K from tensorflow.keras import layers from tensorflow.keras import preprocessing print('TensorFlow version:', tf.__version__) print('Eager Execution Mode:', tf.executing_eagerly()) print('available GPU:', tf.config.list_physical_devices('GPU')) from tensorflow.python.client import device_lib print('==========================================') print(device_lib.list_local_devices()) tf.debugging.set_log_device_placement(False) #%% from tqdm import tqdm import pandas as pd import numpy as np import math import time import re import matplotlib.pyplot as plt from PIL import Image from pprint import pprint import random from scipy import sparse import os os.chdir('/Users/anseunghwan/Documents/GitHub/textmining') import Modules #%% from matplotlib import rc rc('font', family='AppleGothic')
def test_run_one_train_step(self): if tf.executing_eagerly(): # `tfgan.gan_model` doesn't work when executing eagerly. return train_lib.train(self.hparams)
def dynamic_decode(decoder, impute_finished=False, maximum_iterations=None, parallel_iterations=32, swap_memory=False, scope=None): """Perform dynamic decoding with `decoder`. Calls initialize() once and step() repeatedly on the Decoder object. Args: decoder: A `Decoder` instance. impute_finished: Python boolean. If `True`, then states for batch entries which are marked as finished get copied through and the corresponding outputs get zeroed out. This causes some slowdown at each time step, but ensures that the final state and outputs have the correct values and that backprop ignores time steps that were marked as finished. maximum_iterations: `int32` scalar, maximum allowed number of decoding steps. Default is `None` (decode until the decoder is fully done). parallel_iterations: Argument passed to `tf.while_loop`. swap_memory: Argument passed to `tf.while_loop`. scope: Optional variable scope to use. Returns: `(final_outputs, final_state, final_sequence_lengths)`. Raises: TypeError: if `decoder` is not an instance of `Decoder`. ValueError: if `maximum_iterations` is provided but is not a scalar. """ if not isinstance(decoder, Decoder): raise TypeError("Expected decoder to be type Decoder, but saw: %s" % type(decoder)) with tf.variable_scope(scope, "decoder") as varscope: # Determine context types. ctxt = tf.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None in_while_loop = (control_flow_util.GetContainingWhileContext(ctxt) is not None) # Properly cache variable values inside the while_loop. # Don't set a caching device when running in a loop, since it is possible # that train steps could be wrapped in a tf.while_loop. In that scenario # caching prevents forward computations in loop iterations from re-reading # the updated weights. if not tf.executing_eagerly() and not in_while_loop: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) if maximum_iterations is not None: maximum_iterations = tf.convert_to_tensor( maximum_iterations, dtype=tf.int32, name="maximum_iterations") if maximum_iterations.get_shape().ndims != 0: raise ValueError("maximum_iterations must be a scalar") initial_finished, initial_inputs, initial_state = decoder.initialize() zero_outputs = _create_zero_outputs(decoder.output_size, decoder.output_dtype) if is_xla and maximum_iterations is None: raise ValueError( "maximum_iterations is required for XLA compilation.") if maximum_iterations is not None: initial_finished = tf.logical_or(initial_finished, 0 >= maximum_iterations) initial_sequence_lengths = tf.zeros_like(initial_finished, dtype=tf.int32) initial_time = tf.constant(0, dtype=tf.int32) dynamic_size = maximum_iterations is None or not is_xla def _create_ta(s, d): return tf.TensorArray( dtype=d, size=0 if dynamic_size else maximum_iterations, dynamic_size=dynamic_size, element_shape=s) initial_outputs_ta = tf.contrib.framework.nest.map_structure( _create_ta, decoder.output_size, decoder.output_dtype) def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs, finished, unused_sequence_lengths): return tf.logical_not(tf.reduce_all(finished)) def body(time, outputs_ta, state, inputs, finished, sequence_lengths): """Internal while_loop body. Args: time: scalar int32 tensor. outputs_ta: structure of TensorArray. state: (structure of) state tensors and TensorArrays. inputs: (structure of) input tensors. finished: bool tensor (keeping track of what's finished). sequence_lengths: int32 tensor (keeping track of time of finish). Returns: `(time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths)`. ``` """ (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(time, inputs, state) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = tf.logical_or(decoder_finished, finished) next_sequence_lengths = tf.where( tf.logical_not(finished), tf.fill(tf.shape(sequence_lengths), time + 1), sequence_lengths) tf.contrib.framework.nest.assert_same_structure( state, decoder_state) tf.contrib.framework.nest.assert_same_structure( outputs_ta, next_outputs) tf.contrib.framework.nest.assert_same_structure( inputs, next_inputs) # Zero out output values past finish if impute_finished: emit = tf.contrib.framework.nest.map_structure( lambda out, zero: tf.where(finished, zero, out), next_outputs, zero_outputs) else: emit = next_outputs # Copy through states past finish def _maybe_copy_state(new, cur): # TensorArrays and scalar states get passed through. if isinstance(cur, tf.TensorArray): pass_through = True else: new.set_shape(cur.shape) pass_through = (new.shape.ndims == 0) return new if pass_through else tf.where(finished, cur, new) if impute_finished: next_state = tf.contrib.framework.nest.map_structure( _maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = tf.contrib.framework.nest.map_structure( lambda ta, out: ta.write(time, out), outputs_ta, emit) return (time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths) res = tf.while_loop(condition, body, loop_vars=( initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_finished, initial_sequence_lengths, ), parallel_iterations=parallel_iterations, maximum_iterations=maximum_iterations, swap_memory=swap_memory) final_outputs_ta = res[1] final_state = res[2] final_sequence_lengths = res[5] final_outputs = tf.contrib.framework.nest.map_structure( lambda ta: ta.stack(), final_outputs_ta) try: final_outputs, final_state = decoder.finalize( final_outputs, final_state, final_sequence_lengths) except NotImplementedError: pass return final_state.pred_ids
top_dendrogram=True, row_linkage=lambda x: linkage(x, method='average', metric='correlation'), col_linkage=lambda x: linkage(x.T, method='average', metric='correlation'), histogram=True) #https://threader.app/thread/1105139360226140160 import tensorflow as tf print(tf.__version__) import datetime print(datetime.datetime.now()) tf.keras.backend.clear_session() (x_train, y_train), (x_test, y_test) = dfLFQ.load_data() x_train, x_test = (x_train-min(x_train) / (max(x_train)-min(x_train) , (x_test-min(x_test) / (max(x_test)-min(x_test) print("Eager:",tf.executing_eagerly()) print("GPU:",tf.test.is_gpu_available())#:with tf.device("/gpu:0"): #tf.keras.backend.clear_session() def create_model(): return tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(512, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax') ]) model = create_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])