def _build_graph_network_for_inferred_shape(self, input_shape, input_dtype=None): if input_shape is None or not self.layers: return if not tf.__internal__.tf2.enabled( ) or not tf.compat.v1.executing_eagerly_outside_functions(): # This behavior is disabled in V1 or when eager execution is disabled. return if (not self._has_explicit_input_shape and not self._use_legacy_deferred_behavior): # Determine whether the input shape is novel, i.e. whether the model # should be rebuilt. input_shape = tuple(input_shape) if self._inferred_input_shape is None: new_shape = input_shape else: new_shape = relax_input_shape(self._inferred_input_shape, input_shape) if (new_shape is not None and new_shape != self._inferred_input_shape): # A novel shape has been received: we need to rebuild the model. # In case we are inside a graph function, we step out of it. with tf.init_scope(): inputs = input_layer.Input(batch_shape=new_shape, dtype=input_dtype, name=self.layers[0].name + '_input') layer_input = inputs created_nodes = set() for layer in self.layers: # Clear nodes previously created via this method. This prevents # node accumulation and ensures that e.g. `layer.output` is # always connected to `model.inputs` # (this is important e.g. for the feature extraction use case). # We don't just do `layer._inbound_nodes = []` in order # not to break shared layers added to Sequential models (which is # technically illegal as per the `add()` docstring, # but wasn't previously disabled). clear_previously_created_nodes(layer, self._created_nodes) try: # Create Functional API connection by calling the current layer layer_output = layer(layer_input) except: # pylint:disable=bare-except # Functional API calls may fail for a number of reasons: # 1) The layer may be buggy. In this case it will be easier for # the user to debug if we fail on the first call on concrete data, # instead of our own call on a symbolic input. # 2) The layer is dynamic (graph-incompatible) and hasn't # overridden `compute_output_shape`. In this case, it is # impossible to build a graph network. # 3) The layer is otherwise incompatible with the Functional API # (e.g. this is the case for some probabilistic layers that rely # on hacks and that do not return tensors). # In all these cases, we should avoid creating a graph network # (or we simply can't). self._use_legacy_deferred_behavior = True return if len(tf.nest.flatten(layer_output)) != 1: raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) # Keep track of nodes just created above track_nodes_created_by_last_call(layer, created_nodes) layer_input = layer_output outputs = layer_output self._created_nodes = created_nodes try: # Initialize a graph Network. This call will never fail for # a stack of valid Keras layers. # However some users have layers that are fundamentally incompatible # with the Functional API, which do not return tensors. In this # case, we fall back to the legacy deferred behavior. # TODO(fchollet): consider raising here, as we should not be # supporting such layers. self._init_graph_network(inputs, outputs) self._graph_initialized = True except: # pylint:disable=bare-except self._use_legacy_deferred_behavior = True self._inferred_input_shape = new_shape
def test_model_with_crossentropy_losses_channels_first(self): """Tests use of all crossentropy losses with `channels_first`. Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, and `binary_crossentropy`. Verifies that evaluate gives the same result with either `channels_first` or `channels_last` image_data_format. """ def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if K.image_data_format() == 'channels_first' else -1 loss = None num_channels = None activation = None if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = int(np.amax(target) + 1) activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: K.categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: K.binary_crossentropy( y_true, y_pred) # pylint: disable=unnecessary-lambda num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = training.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model if tf.test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): losses_to_test = [ 'sparse_categorical_crossentropy', 'categorical_crossentropy', 'binary_crossentropy' ] data_channels_first = np.array( [[[[8., 7.1, 0.], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], dtype=np.float32) # Labels for testing 4-class sparse_categorical_crossentropy, 4-class # categorical_crossentropy, and 2-class binary_crossentropy: labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[0, 0, 0], [1, 0, 0], [0, 0, 1]], [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32), # pylint: disable=line-too-long np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)] # pylint: disable=line-too-long # Compute one loss for each loss function in the list `losses_to_test`: loss_channels_last = [0., 0., 0.] loss_channels_first = [0., 0., 0.] old_data_format = K.image_data_format() # Evaluate a simple network with channels last, with all three loss # functions: K.set_image_data_format('channels_last') data = np.moveaxis(data_channels_first, 1, -1) for index, loss_function in enumerate(losses_to_test): labels = np.moveaxis(labels_channels_first[index], 1, -1) inputs = input_layer.Input(shape=(3, 3, 1)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_last[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) # Evaluate the same network with channels first, with all three loss # functions: K.set_image_data_format('channels_first') data = data_channels_first for index, loss_function in enumerate(losses_to_test): labels = labels_channels_first[index] inputs = input_layer.Input(shape=(1, 3, 3)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_first[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) K.set_image_data_format(old_data_format) np.testing.assert_allclose( loss_channels_first, loss_channels_last, rtol=1e-06, err_msg='{}{}'.format('Computed different losses for ', 'channels_first and channels_last'))
def add(self, layer): """Adds a layer instance on top of the layer stack. Args: layer: layer instance. Raises: TypeError: If `layer` is not a layer instance. ValueError: In case the `layer` argument does not know its input shape. ValueError: In case the `layer` argument has multiple output tensors, or is already connected somewhere else (forbidden in `Sequential` models). """ # If we are passed a Keras tensor created by keras.Input(), we can extract # the input layer from its keras history and use that without any loss of # generality. if hasattr(layer, '_keras_history'): origin_layer = layer._keras_history[0] if isinstance(origin_layer, input_layer.InputLayer): layer = origin_layer logging.warning( 'Please add `keras.layers.InputLayer` instead of `keras.Input` to ' 'Sequential model. `keras.Input` is intended to be used by ' 'Functional model.') if isinstance(layer, tf.Module): if not isinstance(layer, base_layer.Layer): layer = functional.ModuleWrapper(layer) else: raise TypeError('The added layer must be ' 'an instance of class Layer. ' 'Found: ' + str(layer)) tf_utils.assert_no_legacy_layers([layer]) if not self._is_layer_name_unique(layer): raise ValueError( 'All layers added to a Sequential model ' 'should have unique names. Name "%s" is already the name' ' of a layer in this model. Update the `name` argument ' 'to pass a unique name.' % (layer.name, )) self.built = False set_inputs = False self._maybe_create_attribute('_self_tracked_trackables', []) if not self._self_tracked_trackables: if isinstance(layer, input_layer.InputLayer): # Case where the user passes an Input or InputLayer layer via `add`. set_inputs = True else: batch_shape, dtype = training_utils.get_input_shape_and_dtype( layer) if batch_shape: # Instantiate an input layer. x = input_layer.Input(batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') # This will build the current layer # and create the node connecting the current layer # to the input layer we just created. layer(x) set_inputs = True if set_inputs: outputs = tf.nest.flatten(layer._inbound_nodes[-1].outputs) if len(outputs) != 1: raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) self.outputs = outputs self.inputs = layer_utils.get_source_inputs(self.outputs[0]) self.built = True self._has_explicit_input_shape = True elif self.outputs: # If the model is being built continuously on top of an input layer: # refresh its output. output_tensor = layer(self.outputs[0]) if len(tf.nest.flatten(output_tensor)) != 1: raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) self.outputs = [output_tensor] self.built = True if set_inputs or self._graph_initialized: self._init_graph_network(self.inputs, self.outputs) self._graph_initialized = True else: self._self_tracked_trackables.append(layer) self._handle_deferred_layer_dependencies([layer]) self._layer_call_argspecs[layer] = tf_inspect.getfullargspec( layer.call)
def test_TensorBoard_multi_input_output(self): np.random.seed(1337) tmpdir = self.get_temp_dir() self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True) with tf.Graph().as_default(), self.cached_session(): filepath = os.path.join(tmpdir, 'logs') (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def data_generator(train): if train: max_batch_index = len(x_train) // BATCH_SIZE else: max_batch_index = len(x_test) // BATCH_SIZE i = 0 while 1: if train: # simulate multi-input/output models yield ([x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, [y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2) else: yield ([x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2, [y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] * 2) i += 1 i %= max_batch_index inp1 = input_layer.Input((INPUT_DIM, )) inp2 = input_layer.Input((INPUT_DIM, )) inp = layers.add([inp1, inp2]) hidden = layers.Dense(2, activation='relu')(inp) hidden = layers.Dropout(0.1)(hidden) output1 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden) output2 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden) model = training.Model([inp1, inp2], [output1, output2]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq): return [ callbacks_v1.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, batch_size=5) ] # fit without validation data model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, callbacks=callbacks_factory(histogram_freq=0), epochs=3) # fit with validation data and accuracy model.fit([x_train] * 2, [y_train] * 2, batch_size=BATCH_SIZE, validation_data=([x_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2) # fit generator without validation data model.fit_generator(data_generator(True), len(x_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0)) # fit generator with validation data and accuracy model.fit_generator(data_generator(True), len(x_train), epochs=2, validation_data=([x_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath)
def clone_graph_nodes(inputs, outputs): """Clone the `Node` between the inputs and output tensors. This function is used to create a new functional model from any intermediate keras tensors. The clone of the nodes mimic the behavior of reconstructing the functional graph network by re-executing all the __call__ methods. The cloned nodes will be appended to the layers. Note that a new tf.keras.Inputs will be created for any items in the `inputs` Args: inputs: A nested structure of keras_tensors. outputs: A nested structure of keras_tensors. Returns: A pair of inputs and outputs, with cloned keras_tensors. They can be used to create a new functional model. """ nodes_to_clone = find_nodes_by_inputs_and_outputs(inputs, outputs) cloned_inputs = [] cloned_outputs = [] # We not only need to create copies of Nodes (mimic the calls), also need to # clone keras_tensors to avoid the override of _keras_history attached on # the keras_tensor. The following dict is used to track any keras tensor we # cloned The key is the string ID of the original keras tensor, and value is # the cloned keras_tensor instance. kt_id_mapping = {} for kt_input in tf.nest.flatten(inputs): if kt_input.node.is_input: # For any existing keras_tensor from tf.keras.Input, we leave them # as is. cloned_inputs.append(kt_input) kt_id_mapping[id(kt_input)] = kt_input else: # We need to create a new tf.keras.Input for any intermediate # keras_tensor cpy = _clone_keras_tensor(kt_input) cloned_input = input_layer_module.Input(tensor=cpy) cloned_inputs.append(cloned_input) kt_id_mapping[id(kt_input)] = cloned_input cloned_inputs = tf.nest.pack_sequence_as(inputs, cloned_inputs) for kt_output in tf.nest.flatten(outputs): cpy = _clone_keras_tensor(kt_output) # We reuse the _keras_history here, which contains the old information. # It is used in the Node constructor to check if the tensor # "is_keras_tensor()" The history will be override by the Node # constructor anyway for the corresponding layer output anyway. cpy._keras_history = kt_output._keras_history cloned_outputs.append(cpy) kt_id_mapping[id(kt_output)] = cpy cloned_outputs = tf.nest.pack_sequence_as(outputs, cloned_outputs) for node in nodes_to_clone: # Clone any keras_tensors to avoid override of _keras_history # Or reuse an existing keras_tensor if it has already been cloned. output_copy = clone_keras_tensors(node.output_tensors, kt_id_mapping) call_args_copy = clone_keras_tensors(node.call_args, kt_id_mapping) call_kwargs_copy = clone_keras_tensors(node.call_kwargs, kt_id_mapping) # Creating new nodes based on the existing node information. Node wires # itself to inbound and outbound layers. The Node constructor actually # updates this layer's self._inbound_nodes, sets _keras_history on the # outputs, and adds itself to the `_outbound_nodes` of the layers that # produced the inputs to this layer call. node_module.Node( node.layer, call_args=call_args_copy, call_kwargs=call_kwargs_copy, outputs=output_copy, ) return cloned_inputs, cloned_outputs
def test_build_model_from_intermediate_tensor_with_complicated_model(self): # The topology is like below: # input1 -> dense1 -> a # + -> c - + --> d - + --> output # input2 -> dense1 -> b -------^ ^ # input3 -> dense2 -> e -----------------| batch_size = 8 input1 = input_layer_lib.Input((2, )) input2 = input_layer_lib.Input((2, )) input3 = input_layer_lib.Input((8, )) dense1 = layers.Dense(8, name="dense1") dense2 = layers.Dense(8, name="dense2") # dense1 are shared between input1 and input2 a = dense1(input1) b = dense1(input2) c = layers.Add()([a, b]) # d has a residual connection from b. d = layers.Add()([b, c]) e = dense2(input3) output = layers.Add()([d, e]) # We skip the input2 here and use b instead. model = models.Model([input1, b, input3], output) # Make sure we have 8 layers, 3 for inputs, 2 for dense and 3 for Add. # Note that dense1 is still in use by input1. self.assertLen(model.layers, 8) # Since the layers are not ordered, let's check class of the layers to # make sure it match the expectation. class_count = collections.Counter([l.__class__ for l in model.layers]) self.assertEqual(class_count[input_layer_lib.InputLayer], 3) self.assertEqual(class_count[layers.Dense], 2) self.assertEqual(class_count[layers.Add], 3) model.compile("rmsprop", "mse") model.fit( [ np.random.randn(batch_size, 2), np.random.randn(batch_size, 8), # The shape of b is (batch, 8) np.random.randn(batch_size, 8), ], np.random.randn(batch_size, 8), ) output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model") model.save(output_path, save_format="tf") loaded_model = models.load_model(output_path) self.assertEqual(model.summary(), loaded_model.summary()) model2 = models.Model([a, b], d) # 2 input layers and 2 Add layer. self.assertLen(model2.layers, 4) class_count = collections.Counter([l.__class__ for l in model2.layers]) self.assertEqual(class_count[input_layer_lib.InputLayer], 2) self.assertEqual(class_count[layers.Add], 2) model2.compile("rmsprop", "mse") model2.fit( [np.random.randn(batch_size, 8), np.random.randn(batch_size, 8)], np.random.randn(batch_size, 8), )
def test_model_with_crossentropy_losses_channels_first(self): """Tests use of all crossentropy losses with `channels_first`. Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, and `binary_crossentropy`. Verifies that evaluate gives the same result with either `channels_first` or `channels_last` image_data_format. """ def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if backend.image_data_format() == "channels_first" else -1 loss = None num_channels = None activation = None if loss_name == "sparse_categorical_crossentropy": loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy( # noqa: E501 y_true, y_pred, axis=axis) num_channels = int(np.amax(target) + 1) activation = "softmax" elif loss_name == "categorical_crossentropy": loss = lambda y_true, y_pred: backend.categorical_crossentropy( y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = "softmax" elif loss_name == "binary_crossentropy": loss = lambda y_true, y_pred: backend.binary_crossentropy( y_true, y_pred) num_channels = target.shape[axis] activation = "sigmoid" predictions = Conv2D( num_channels, 1, activation=activation, kernel_initializer="ones", bias_initializer="ones", )(input_tensor) simple_model = training.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer="rmsprop", loss=loss) return simple_model if tf.test.is_gpu_available(cuda_only=True): with test_utils.use_gpu(): losses_to_test = [ "sparse_categorical_crossentropy", "categorical_crossentropy", "binary_crossentropy", ] data_channels_first = np.array( [[[[8.0, 7.1, 0.0], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]], dtype=np.float32, ) # Labels for testing 4-class sparse_categorical_crossentropy, # 4-class categorical_crossentropy, and 2-class # binary_crossentropy: labels_channels_first = [ np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32), np.array( [[ [[0, 1, 0], [0, 1, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 1], [0, 1, 0]], [[0, 0, 0], [1, 0, 0], [0, 0, 1]], [[0, 0, 1], [0, 0, 0], [1, 0, 0]], ]], dtype=np.float32, ), np.array( [[ [[0, 1, 0], [0, 1, 0], [0, 0, 1]], [[1, 0, 1], [1, 0, 1], [1, 1, 0]], ]], dtype=np.float32, ), ] # Compute one loss for each loss function in the list # `losses_to_test`: loss_channels_last = [0.0, 0.0, 0.0] loss_channels_first = [0.0, 0.0, 0.0] old_data_format = backend.image_data_format() # Evaluate a simple network with channels last, with all three # loss functions: backend.set_image_data_format("channels_last") data = np.moveaxis(data_channels_first, 1, -1) for index, loss_function in enumerate(losses_to_test): labels = np.moveaxis(labels_channels_first[index], 1, -1) inputs = input_layer.Input(shape=(3, 3, 1)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_last[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) # Evaluate the same network with channels first, with all three # loss functions: backend.set_image_data_format("channels_first") data = data_channels_first for index, loss_function in enumerate(losses_to_test): labels = labels_channels_first[index] inputs = input_layer.Input(shape=(1, 3, 3)) model = prepare_simple_model(inputs, loss_function, labels) loss_channels_first[index] = model.evaluate(x=data, y=labels, batch_size=1, verbose=0) backend.set_image_data_format(old_data_format) np.testing.assert_allclose( loss_channels_first, loss_channels_last, rtol=1e-06, err_msg="{}{}".format( "Computed different losses for ", "channels_first and channels_last", ), )