Beispiel #1
0
    def test_multi_gpu_float16_no_masking_no_dropout_noise_shape_no_sample_weight_mode(
            self):
        dtype = self._enable_float16()

        batch_generator = FakeChatGenerator(
            num_unique_symbols=self.num_unique_symbols,
            max_seq_length=self.max_seq_length,
            batch_size=self.batch_size,
            return_sample_weights=True,
            dtype=dtype)

        model_generator = Seq2SeqWithSubmodels(
            TestIssuesTFKeras.KERAS_CLASSES,
            self.max_seq_length,
            self.num_unique_symbols,
            use_masking=False,
            disable_dropout_noise_shape=True,
            dtype=dtype)

        def create_optimizer():
            # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
            optimizer = Adam(lr=1e-4)
            return tf.train.experimental.enable_mixed_precision_graph_rewrite(
                optimizer)

        mirrored_strategy = tf.distribute.MirroredStrategy()
        with mirrored_strategy.scope():
            models = model_generator.stamp_train_model()
            model = models["train_model"]

            self._compile_model(model,
                                create_optimizer,
                                sample_weight_mode=False)

        # TODO : shapes are only correct when using one-hot encoding
        dataset = tf.data.Dataset.from_generator(
            batch_generator,
            (
                {
                    'chat-input': dtype,
                    'teacher-forcing-input': dtype
                },
                dtype,  # outputs
                dtype),  # sample weights
            ({
                'chat-input': (None, None, None),
                'teacher-forcing-input': (None, None, None)
            }, (None, None, None), (None, None)))

        model.fit(dataset,
                  steps_per_epoch=len(batch_generator),
                  epochs=5,
                  verbose=1,
                  max_queue_size=10,
                  workers=3)
Beispiel #2
0
    def test_multi_gpu_float32_masking_use_partially_known_dropout_noise_shape_sample_weight_mode(
            self):
        dtype = self._enable_float32()

        batch_generator = FakeChatGenerator(
            num_unique_symbols=self.num_unique_symbols,
            max_seq_length=self.max_seq_length,
            batch_size=self.batch_size,
            return_sample_weights=True,
            dtype=dtype)

        model_generator = Seq2SeqWithSubmodels(
            TestIssuesTFKeras.KERAS_CLASSES,
            self.max_seq_length,
            self.num_unique_symbols,
            use_masking=True,
            use_partially_known_dropout_noise_shape=True,
            dtype=dtype)

        mirrored_strategy = tf.distribute.MirroredStrategy()
        with mirrored_strategy.scope():
            models = model_generator.stamp_train_model()
            model = models["train_model"]

            self._compile_model(model,
                                lambda: Adam(lr=1e-4),
                                sample_weight_mode=True)

        # TODO : shapes are only correct when using one-hot encoding
        dataset = tf.data.Dataset.from_generator(
            batch_generator,
            (
                {
                    'chat-input': dtype,
                    'teacher-forcing-input': dtype
                },
                dtype,  # outputs
                dtype),  # sample weights
            ({
                'chat-input': (None, None, self.num_unique_symbols),
                'teacher-forcing-input': (None, None, self.num_unique_symbols)
            }, (None, None, self.num_unique_symbols), (None, None)))

        model.fit(dataset,
                  steps_per_epoch=len(batch_generator),
                  epochs=5,
                  verbose=1,
                  max_queue_size=10,
                  workers=3)