Beispiel #1
0
 def _build_graph_network_for_inferred_shape(self,
                                             input_shape,
                                             input_dtype=None):
     if input_shape is None or not self.layers:
         return
     if not tf.__internal__.tf2.enabled(
     ) or not tf.compat.v1.executing_eagerly_outside_functions():
         # This behavior is disabled in V1 or when eager execution is disabled.
         return
     if (not self._has_explicit_input_shape
             and not self._use_legacy_deferred_behavior):
         # Determine whether the input shape is novel, i.e. whether the model
         # should be rebuilt.
         input_shape = tuple(input_shape)
         if self._inferred_input_shape is None:
             new_shape = input_shape
         else:
             new_shape = relax_input_shape(self._inferred_input_shape,
                                           input_shape)
         if (new_shape is not None
                 and new_shape != self._inferred_input_shape):
             # A novel shape has been received: we need to rebuild the model.
             # In case we are inside a graph function, we step out of it.
             with tf.init_scope():
                 inputs = input_layer.Input(batch_shape=new_shape,
                                            dtype=input_dtype,
                                            name=self.layers[0].name +
                                            '_input')
                 layer_input = inputs
                 created_nodes = set()
                 for layer in self.layers:
                     # Clear nodes previously created via this method. This prevents
                     # node accumulation and ensures that e.g. `layer.output` is
                     # always connected to `model.inputs`
                     # (this is important e.g. for the feature extraction use case).
                     # We don't just do `layer._inbound_nodes = []` in order
                     # not to break shared layers added to Sequential models (which is
                     # technically illegal as per the `add()` docstring,
                     # but wasn't previously disabled).
                     clear_previously_created_nodes(layer,
                                                    self._created_nodes)
                     try:
                         # Create Functional API connection by calling the current layer
                         layer_output = layer(layer_input)
                     except:  # pylint:disable=bare-except
                         # Functional API calls may fail for a number of reasons:
                         # 1) The layer may be buggy. In this case it will be easier for
                         # the user to debug if we fail on the first call on concrete data,
                         # instead of our own call on a symbolic input.
                         # 2) The layer is dynamic (graph-incompatible) and hasn't
                         # overridden `compute_output_shape`. In this case, it is
                         # impossible to build a graph network.
                         # 3) The layer is otherwise incompatible with the Functional API
                         # (e.g. this is the case for some probabilistic layers that rely
                         # on hacks and that do not return tensors).
                         # In all these cases, we should avoid creating a graph network
                         # (or we simply can't).
                         self._use_legacy_deferred_behavior = True
                         return
                     if len(tf.nest.flatten(layer_output)) != 1:
                         raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
                     # Keep track of nodes just created above
                     track_nodes_created_by_last_call(layer, created_nodes)
                     layer_input = layer_output
                     outputs = layer_output
                 self._created_nodes = created_nodes
                 try:
                     # Initialize a graph Network. This call will never fail for
                     # a stack of valid Keras layers.
                     # However some users have layers that are fundamentally incompatible
                     # with the Functional API, which do not return tensors. In this
                     # case, we fall back to the legacy deferred behavior.
                     # TODO(fchollet): consider raising here, as we should not be
                     # supporting such layers.
                     self._init_graph_network(inputs, outputs)
                     self._graph_initialized = True
                 except:  # pylint:disable=bare-except
                     self._use_legacy_deferred_behavior = True
             self._inferred_input_shape = new_shape
Beispiel #2
0
    def test_model_with_crossentropy_losses_channels_first(self):
        """Tests use of all crossentropy losses with `channels_first`.

    Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
    and `binary_crossentropy`.
    Verifies that evaluate gives the same result with either `channels_first`
    or `channels_last` image_data_format.
    """
        def prepare_simple_model(input_tensor, loss_name, target):
            axis = 1 if K.image_data_format() == 'channels_first' else -1
            loss = None
            num_channels = None
            activation = None
            if loss_name == 'sparse_categorical_crossentropy':
                loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy(  # pylint: disable=g-long-lambda
                    y_true,
                    y_pred,
                    axis=axis)
                num_channels = int(np.amax(target) + 1)
                activation = 'softmax'
            elif loss_name == 'categorical_crossentropy':
                loss = lambda y_true, y_pred: K.categorical_crossentropy(  # pylint: disable=g-long-lambda
                    y_true,
                    y_pred,
                    axis=axis)
                num_channels = target.shape[axis]
                activation = 'softmax'
            elif loss_name == 'binary_crossentropy':
                loss = lambda y_true, y_pred: K.binary_crossentropy(
                    y_true, y_pred)  # pylint: disable=unnecessary-lambda
                num_channels = target.shape[axis]
                activation = 'sigmoid'

            predictions = Conv2D(num_channels,
                                 1,
                                 activation=activation,
                                 kernel_initializer='ones',
                                 bias_initializer='ones')(input_tensor)
            simple_model = training.Model(inputs=input_tensor,
                                          outputs=predictions)
            simple_model.compile(optimizer='rmsprop', loss=loss)
            return simple_model

        if tf.test.is_gpu_available(cuda_only=True):
            with testing_utils.use_gpu():
                losses_to_test = [
                    'sparse_categorical_crossentropy',
                    'categorical_crossentropy', 'binary_crossentropy'
                ]

                data_channels_first = np.array(
                    [[[[8., 7.1, 0.], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]],
                    dtype=np.float32)
                # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
                # categorical_crossentropy, and 2-class binary_crossentropy:
                labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                         np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                                                    [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                                                    [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                                                    [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]], dtype=np.float32),  # pylint: disable=line-too-long
                                         np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                                                    [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]], dtype=np.float32)]  # pylint: disable=line-too-long
                # Compute one loss for each loss function in the list `losses_to_test`:
                loss_channels_last = [0., 0., 0.]
                loss_channels_first = [0., 0., 0.]

                old_data_format = K.image_data_format()

                # Evaluate a simple network with channels last, with all three loss
                # functions:
                K.set_image_data_format('channels_last')
                data = np.moveaxis(data_channels_first, 1, -1)
                for index, loss_function in enumerate(losses_to_test):
                    labels = np.moveaxis(labels_channels_first[index], 1, -1)
                    inputs = input_layer.Input(shape=(3, 3, 1))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_last[index] = model.evaluate(x=data,
                                                               y=labels,
                                                               batch_size=1,
                                                               verbose=0)

                # Evaluate the same network with channels first, with all three loss
                # functions:
                K.set_image_data_format('channels_first')
                data = data_channels_first
                for index, loss_function in enumerate(losses_to_test):
                    labels = labels_channels_first[index]
                    inputs = input_layer.Input(shape=(1, 3, 3))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_first[index] = model.evaluate(x=data,
                                                                y=labels,
                                                                batch_size=1,
                                                                verbose=0)

                K.set_image_data_format(old_data_format)

                np.testing.assert_allclose(
                    loss_channels_first,
                    loss_channels_last,
                    rtol=1e-06,
                    err_msg='{}{}'.format('Computed different losses for ',
                                          'channels_first and channels_last'))
Beispiel #3
0
    def add(self, layer):
        """Adds a layer instance on top of the layer stack.

    Args:
        layer: layer instance.

    Raises:
        TypeError: If `layer` is not a layer instance.
        ValueError: In case the `layer` argument does not
            know its input shape.
        ValueError: In case the `layer` argument has
            multiple output tensors, or is already connected
            somewhere else (forbidden in `Sequential` models).
    """
        # If we are passed a Keras tensor created by keras.Input(), we can extract
        # the input layer from its keras history and use that without any loss of
        # generality.
        if hasattr(layer, '_keras_history'):
            origin_layer = layer._keras_history[0]
            if isinstance(origin_layer, input_layer.InputLayer):
                layer = origin_layer
                logging.warning(
                    'Please add `keras.layers.InputLayer` instead of `keras.Input` to '
                    'Sequential model. `keras.Input` is intended to be used by '
                    'Functional model.')

        if isinstance(layer, tf.Module):
            if not isinstance(layer, base_layer.Layer):
                layer = functional.ModuleWrapper(layer)
        else:
            raise TypeError('The added layer must be '
                            'an instance of class Layer. '
                            'Found: ' + str(layer))

        tf_utils.assert_no_legacy_layers([layer])
        if not self._is_layer_name_unique(layer):
            raise ValueError(
                'All layers added to a Sequential model '
                'should have unique names. Name "%s" is already the name'
                ' of a layer in this model. Update the `name` argument '
                'to pass a unique name.' % (layer.name, ))

        self.built = False
        set_inputs = False
        self._maybe_create_attribute('_self_tracked_trackables', [])
        if not self._self_tracked_trackables:
            if isinstance(layer, input_layer.InputLayer):
                # Case where the user passes an Input or InputLayer layer via `add`.
                set_inputs = True
            else:
                batch_shape, dtype = training_utils.get_input_shape_and_dtype(
                    layer)
                if batch_shape:
                    # Instantiate an input layer.
                    x = input_layer.Input(batch_shape=batch_shape,
                                          dtype=dtype,
                                          name=layer.name + '_input')
                    # This will build the current layer
                    # and create the node connecting the current layer
                    # to the input layer we just created.
                    layer(x)
                    set_inputs = True

            if set_inputs:
                outputs = tf.nest.flatten(layer._inbound_nodes[-1].outputs)
                if len(outputs) != 1:
                    raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
                self.outputs = outputs
                self.inputs = layer_utils.get_source_inputs(self.outputs[0])
                self.built = True
                self._has_explicit_input_shape = True

        elif self.outputs:
            # If the model is being built continuously on top of an input layer:
            # refresh its output.
            output_tensor = layer(self.outputs[0])
            if len(tf.nest.flatten(output_tensor)) != 1:
                raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
            self.outputs = [output_tensor]
            self.built = True

        if set_inputs or self._graph_initialized:
            self._init_graph_network(self.inputs, self.outputs)
            self._graph_initialized = True
        else:
            self._self_tracked_trackables.append(layer)
            self._handle_deferred_layer_dependencies([layer])

        self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(
            layer.call)
Beispiel #4
0
    def test_TensorBoard_multi_input_output(self):
        np.random.seed(1337)
        tmpdir = self.get_temp_dir()
        self.addCleanup(shutil.rmtree, tmpdir, ignore_errors=True)

        with tf.Graph().as_default(), self.cached_session():
            filepath = os.path.join(tmpdir, 'logs')

            (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
                train_samples=TRAIN_SAMPLES,
                test_samples=TEST_SAMPLES,
                input_shape=(INPUT_DIM, ),
                num_classes=NUM_CLASSES)
            y_test = np_utils.to_categorical(y_test)
            y_train = np_utils.to_categorical(y_train)

            def data_generator(train):
                if train:
                    max_batch_index = len(x_train) // BATCH_SIZE
                else:
                    max_batch_index = len(x_test) // BATCH_SIZE
                i = 0
                while 1:
                    if train:
                        # simulate multi-input/output models
                        yield ([x_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] *
                               2,
                               [y_train[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] *
                               2)
                    else:
                        yield ([x_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] *
                               2,
                               [y_test[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]] *
                               2)
                    i += 1
                    i %= max_batch_index

            inp1 = input_layer.Input((INPUT_DIM, ))
            inp2 = input_layer.Input((INPUT_DIM, ))
            inp = layers.add([inp1, inp2])
            hidden = layers.Dense(2, activation='relu')(inp)
            hidden = layers.Dropout(0.1)(hidden)
            output1 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
            output2 = layers.Dense(NUM_CLASSES, activation='softmax')(hidden)
            model = training.Model([inp1, inp2], [output1, output2])
            model.compile(loss='categorical_crossentropy',
                          optimizer='sgd',
                          metrics=['accuracy'])

            # we must generate new callbacks for each test, as they aren't stateless
            def callbacks_factory(histogram_freq):
                return [
                    callbacks_v1.TensorBoard(log_dir=filepath,
                                             histogram_freq=histogram_freq,
                                             write_images=True,
                                             write_grads=True,
                                             batch_size=5)
                ]

            # fit without validation data
            model.fit([x_train] * 2, [y_train] * 2,
                      batch_size=BATCH_SIZE,
                      callbacks=callbacks_factory(histogram_freq=0),
                      epochs=3)

            # fit with validation data and accuracy
            model.fit([x_train] * 2, [y_train] * 2,
                      batch_size=BATCH_SIZE,
                      validation_data=([x_test] * 2, [y_test] * 2),
                      callbacks=callbacks_factory(histogram_freq=1),
                      epochs=2)

            # fit generator without validation data
            model.fit_generator(data_generator(True),
                                len(x_train),
                                epochs=2,
                                callbacks=callbacks_factory(histogram_freq=0))

            # fit generator with validation data and accuracy
            model.fit_generator(data_generator(True),
                                len(x_train),
                                epochs=2,
                                validation_data=([x_test] * 2, [y_test] * 2),
                                callbacks=callbacks_factory(histogram_freq=1))
            assert os.path.isdir(filepath)
Beispiel #5
0
def clone_graph_nodes(inputs, outputs):
    """Clone the `Node` between the inputs and output tensors.

    This function is used to create a new functional model from any intermediate
    keras tensors. The clone of the nodes mimic the behavior of reconstructing
    the functional graph network by re-executing all the __call__ methods. The
    cloned nodes will be appended to the layers.

    Note that a new tf.keras.Inputs will be created for any items in the
    `inputs`

    Args:
      inputs: A nested structure of keras_tensors.
      outputs: A nested structure of keras_tensors.

    Returns:
      A pair of inputs and outputs, with cloned keras_tensors. They can be used
      to create a new functional model.
    """
    nodes_to_clone = find_nodes_by_inputs_and_outputs(inputs, outputs)
    cloned_inputs = []
    cloned_outputs = []
    # We not only need to create copies of Nodes (mimic the calls), also need to
    # clone keras_tensors to avoid the override of _keras_history attached on
    # the keras_tensor. The following dict is used to track any keras tensor we
    # cloned The key is the string ID of the original keras tensor, and value is
    # the cloned keras_tensor instance.
    kt_id_mapping = {}

    for kt_input in tf.nest.flatten(inputs):
        if kt_input.node.is_input:
            # For any existing keras_tensor from tf.keras.Input, we leave them
            # as is.
            cloned_inputs.append(kt_input)
            kt_id_mapping[id(kt_input)] = kt_input
        else:
            # We need to create a new tf.keras.Input for any intermediate
            # keras_tensor
            cpy = _clone_keras_tensor(kt_input)
            cloned_input = input_layer_module.Input(tensor=cpy)
            cloned_inputs.append(cloned_input)
            kt_id_mapping[id(kt_input)] = cloned_input
    cloned_inputs = tf.nest.pack_sequence_as(inputs, cloned_inputs)

    for kt_output in tf.nest.flatten(outputs):
        cpy = _clone_keras_tensor(kt_output)
        # We reuse the _keras_history here, which contains the old information.
        # It is used in the Node constructor to check if the tensor
        # "is_keras_tensor()" The history will be override by the Node
        # constructor anyway for the corresponding layer output anyway.
        cpy._keras_history = kt_output._keras_history
        cloned_outputs.append(cpy)
        kt_id_mapping[id(kt_output)] = cpy
    cloned_outputs = tf.nest.pack_sequence_as(outputs, cloned_outputs)

    for node in nodes_to_clone:
        # Clone any keras_tensors to avoid override of _keras_history
        # Or reuse an existing keras_tensor if it has already been cloned.
        output_copy = clone_keras_tensors(node.output_tensors, kt_id_mapping)
        call_args_copy = clone_keras_tensors(node.call_args, kt_id_mapping)
        call_kwargs_copy = clone_keras_tensors(node.call_kwargs, kt_id_mapping)
        # Creating new nodes based on the existing node information.  Node wires
        # itself to inbound and outbound layers.  The Node constructor actually
        # updates this layer's self._inbound_nodes, sets _keras_history on the
        # outputs, and adds itself to the `_outbound_nodes` of the layers that
        # produced the inputs to this layer call.
        node_module.Node(
            node.layer,
            call_args=call_args_copy,
            call_kwargs=call_kwargs_copy,
            outputs=output_copy,
        )
    return cloned_inputs, cloned_outputs
    def test_build_model_from_intermediate_tensor_with_complicated_model(self):
        # The topology is like below:
        # input1 -> dense1 -> a
        #                     + -> c - + --> d - + --> output
        # input2 -> dense1 -> b -------^         ^
        # input3 -> dense2 -> e -----------------|
        batch_size = 8
        input1 = input_layer_lib.Input((2, ))
        input2 = input_layer_lib.Input((2, ))
        input3 = input_layer_lib.Input((8, ))

        dense1 = layers.Dense(8, name="dense1")
        dense2 = layers.Dense(8, name="dense2")

        # dense1 are shared between input1 and input2
        a = dense1(input1)
        b = dense1(input2)

        c = layers.Add()([a, b])
        # d has a residual connection from b.
        d = layers.Add()([b, c])
        e = dense2(input3)
        output = layers.Add()([d, e])

        # We skip the input2 here and use b instead.
        model = models.Model([input1, b, input3], output)
        # Make sure we have 8 layers, 3 for inputs, 2 for dense and 3 for Add.
        # Note that dense1 is still in use by input1.
        self.assertLen(model.layers, 8)
        # Since the layers are not ordered, let's check class of the layers to
        # make sure it match the expectation.
        class_count = collections.Counter([l.__class__ for l in model.layers])
        self.assertEqual(class_count[input_layer_lib.InputLayer], 3)
        self.assertEqual(class_count[layers.Dense], 2)
        self.assertEqual(class_count[layers.Add], 3)

        model.compile("rmsprop", "mse")
        model.fit(
            [
                np.random.randn(batch_size, 2),
                np.random.randn(batch_size, 8),  # The shape of b is (batch, 8)
                np.random.randn(batch_size, 8),
            ],
            np.random.randn(batch_size, 8),
        )
        output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
        model.save(output_path, save_format="tf")
        loaded_model = models.load_model(output_path)
        self.assertEqual(model.summary(), loaded_model.summary())

        model2 = models.Model([a, b], d)
        # 2 input layers and 2 Add layer.
        self.assertLen(model2.layers, 4)
        class_count = collections.Counter([l.__class__ for l in model2.layers])
        self.assertEqual(class_count[input_layer_lib.InputLayer], 2)
        self.assertEqual(class_count[layers.Add], 2)

        model2.compile("rmsprop", "mse")
        model2.fit(
            [np.random.randn(batch_size, 8),
             np.random.randn(batch_size, 8)],
            np.random.randn(batch_size, 8),
        )
Beispiel #7
0
    def test_model_with_crossentropy_losses_channels_first(self):
        """Tests use of all crossentropy losses with `channels_first`.

        Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
        and `binary_crossentropy`.
        Verifies that evaluate gives the same result with either
        `channels_first` or `channels_last` image_data_format.
        """
        def prepare_simple_model(input_tensor, loss_name, target):
            axis = 1 if backend.image_data_format() == "channels_first" else -1
            loss = None
            num_channels = None
            activation = None
            if loss_name == "sparse_categorical_crossentropy":
                loss = lambda y_true, y_pred: backend.sparse_categorical_crossentropy(  # noqa: E501
                    y_true, y_pred, axis=axis)
                num_channels = int(np.amax(target) + 1)
                activation = "softmax"
            elif loss_name == "categorical_crossentropy":
                loss = lambda y_true, y_pred: backend.categorical_crossentropy(
                    y_true, y_pred, axis=axis)
                num_channels = target.shape[axis]
                activation = "softmax"
            elif loss_name == "binary_crossentropy":
                loss = lambda y_true, y_pred: backend.binary_crossentropy(
                    y_true, y_pred)
                num_channels = target.shape[axis]
                activation = "sigmoid"

            predictions = Conv2D(
                num_channels,
                1,
                activation=activation,
                kernel_initializer="ones",
                bias_initializer="ones",
            )(input_tensor)
            simple_model = training.Model(inputs=input_tensor,
                                          outputs=predictions)
            simple_model.compile(optimizer="rmsprop", loss=loss)
            return simple_model

        if tf.test.is_gpu_available(cuda_only=True):
            with test_utils.use_gpu():
                losses_to_test = [
                    "sparse_categorical_crossentropy",
                    "categorical_crossentropy",
                    "binary_crossentropy",
                ]

                data_channels_first = np.array(
                    [[[[8.0, 7.1, 0.0], [4.5, 2.6, 0.55], [0.9, 4.2, 11.2]]]],
                    dtype=np.float32,
                )
                # Labels for testing 4-class sparse_categorical_crossentropy,
                # 4-class categorical_crossentropy, and 2-class
                # binary_crossentropy:
                labels_channels_first = [
                    np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]],
                             dtype=np.float32),
                    np.array(
                        [[
                            [[0, 1, 0], [0, 1, 0], [0, 0, 0]],
                            [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                            [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
                            [[0, 0, 1], [0, 0, 0], [1, 0, 0]],
                        ]],
                        dtype=np.float32,
                    ),
                    np.array(
                        [[
                            [[0, 1, 0], [0, 1, 0], [0, 0, 1]],
                            [[1, 0, 1], [1, 0, 1], [1, 1, 0]],
                        ]],
                        dtype=np.float32,
                    ),
                ]
                # Compute one loss for each loss function in the list
                # `losses_to_test`:
                loss_channels_last = [0.0, 0.0, 0.0]
                loss_channels_first = [0.0, 0.0, 0.0]

                old_data_format = backend.image_data_format()

                # Evaluate a simple network with channels last, with all three
                # loss functions:
                backend.set_image_data_format("channels_last")
                data = np.moveaxis(data_channels_first, 1, -1)
                for index, loss_function in enumerate(losses_to_test):
                    labels = np.moveaxis(labels_channels_first[index], 1, -1)
                    inputs = input_layer.Input(shape=(3, 3, 1))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_last[index] = model.evaluate(x=data,
                                                               y=labels,
                                                               batch_size=1,
                                                               verbose=0)

                # Evaluate the same network with channels first, with all three
                # loss functions:
                backend.set_image_data_format("channels_first")
                data = data_channels_first
                for index, loss_function in enumerate(losses_to_test):
                    labels = labels_channels_first[index]
                    inputs = input_layer.Input(shape=(1, 3, 3))
                    model = prepare_simple_model(inputs, loss_function, labels)
                    loss_channels_first[index] = model.evaluate(x=data,
                                                                y=labels,
                                                                batch_size=1,
                                                                verbose=0)

                backend.set_image_data_format(old_data_format)

                np.testing.assert_allclose(
                    loss_channels_first,
                    loss_channels_last,
                    rtol=1e-06,
                    err_msg="{}{}".format(
                        "Computed different losses for ",
                        "channels_first and channels_last",
                    ),
                )