Beispiel #1
0
 def get_model_and_train_step():
     inputs = Input(shape=(1, ))
     targets = Input(shape=(1, ))
     outputs = testing_utils.Bias()(inputs)
     model = Model([inputs, targets], outputs)
     model.add_loss(MAE()(targets, outputs))
     model.add_loss(math_ops.reduce_mean(mae(targets, outputs)))
     return get_ctl_train_step(model)
Beispiel #2
0
 def test_invalid_constant_input(self):
     inputs = Input(shape=(1, ))
     outputs = testing_utils.Bias()(inputs)
     model = Model(inputs, outputs)
     with self.assertRaisesRegex(
             ValueError,
             'Expected a symbolic Tensors or a callable for the loss value'
     ):
         model.add_loss(1.)
 def test_invalid_variable_input(self):
   with context.eager_mode():
     inputs = Input(shape=(1,))
     outputs = testing_utils.Bias()(inputs)
     model = Model(inputs, outputs)
     with self.assertRaisesRegexp(
         ValueError,
         'Expected a symbolic Tensors or a callable for the loss value'):
       model.add_loss(model.weights[0])
Beispiel #4
0
            def get_model_and_train_step():
                inputs = Input(shape=(1, ))
                targets = Input(shape=(1, ))
                outputs = testing_utils.Bias()(inputs)
                model = Model([inputs, targets], outputs)

                def callable_loss():
                    return math_ops.reduce_sum(model.weights)

                model.add_loss(callable_loss)
                return get_ctl_train_step(model)
Beispiel #5
0
 def test_add_entropy_loss_on_functional_model(self):
     inputs = Input(shape=(1, ))
     targets = Input(shape=(1, ))
     outputs = testing_utils.Bias()(inputs)
     model = Model([inputs, targets], outputs)
     model.add_loss(losses.binary_crossentropy(targets, outputs))
     model.compile('sgd', run_eagerly=testing_utils.should_run_eagerly())
     with test.mock.patch.object(logging, 'warning') as mock_log:
         model.fit([self.x, self.y], batch_size=3, epochs=5)
         self.assertNotIn('Gradients do not exist for variables',
                          str(mock_log.call_args))
  def test_loss_on_model_fit(self):
    inputs = Input(shape=(1,))
    targets = Input(shape=(1,))
    outputs = testing_utils.Bias()(inputs)
    model = Model([inputs, targets], outputs)
    model.add_loss(MAE()(targets, outputs))
    model.add_loss(math_ops.reduce_mean(mae(targets, outputs)))
    model.compile(
        optimizer_v2.gradient_descent.SGD(0.05),
        run_eagerly=testing_utils.should_run_eagerly())

    history = model.fit([self.x, self.y], batch_size=3, epochs=5)
    self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
  def test_loss_with_sample_weight_on_model_fit(self):
    inputs = Input(shape=(1,))
    targets = Input(shape=(1,))
    sw = Input(shape=(1,))
    outputs = testing_utils.Bias()(inputs)
    model = Model([inputs, targets, sw], outputs)
    model.add_loss(MAE()(targets, outputs, sw))
    model.add_loss(3 * math_ops.reduce_mean(sw * mae(targets, outputs)))
    model.compile(
        optimizer_v2.gradient_descent.SGD(0.025),
        run_eagerly=testing_utils.should_run_eagerly())

    history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5)
    self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
Beispiel #8
0
def make_variational_autoencoder(original_dim,
                                 intermediate_dim=512,
                                 latent_dim=2,
                                 reconstruction_loss_fn=binary_crossentropy):
    # VAE model = encoder + decoder
    # build encoder model
    inputs = Input(shape=(original_dim, ), name='encoder_input')
    x = Dense(intermediate_dim, activation='relu')(inputs)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])

    # instantiate encoder model
    encoder = Model(inputs, z, name='encoder')

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(intermediate_dim, activation='relu')(latent_inputs)
    outputs = Dense(original_dim, activation='sigmoid')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')

    # instantiate VAE model
    outputs = decoder(encoder(inputs))
    vae = Model(inputs, outputs, name='vae_mlp')

    reconstruction_loss = reconstruction_loss_fn(inputs, outputs)
    reconstruction_loss *= original_dim
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer='adam')

    return vae, encoder
class TransformerSeq2SeqModel(AbstractSeq2SeqModel, TFBasedModel):
    custom_objects = {"ExtractLastTokenLayer": ExtractLastTokenLayer}

    def _load_config(self, config):
        super()._load_config(config)
        self.last_token_model = None
        self.max_len = self.task_config["max_len"]
        self.keep_token_file = self.task_config.get("keep_token_file")
        self.keep_token_ids = None
        self.extra_tokens = []
        self.extra_token_ids = []
        self.extra_token_file = self.task_config.get("extra_token_file")
        if self.extra_token_file:
            self.extra_tokens = load_lines(self.extra_token_file)

    def build_model(self,
                    pretrained_model_path=None,
                    pretrained_model_tag="bert",
                    transformer_kwargs={},
                    h5_file=None,
                    **kwargs):
        with self.get_scope():
            if hasattr(self, 'keep_token_ids'):
                transformer_kwargs.update(keep_tokens=self.keep_token_ids)

            self.nn_model = get_unilm_model(
                pretrained_model_path,
                pretrained_model_tag="bert",
                transformer_kwargs=transformer_kwargs,
                h5_file=h5_file)
        logger.info("nn model's summary:")
        self.nn_model.summary(print_fn=logger.info)
        self._update_model_dict("test", self.nn_model)
        return self.nn_model

    def compile_model(self, optimizer_name: str, optimizer_args: str,
                      **kwargs):
        logger.info(
            f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}"
        )

        with self.get_scope():
            input_ids, segment_ids = self.nn_model.inputs[:2]
            prob_vec = self.nn_model(self.nn_model.inputs)
            self.train_model = Model(inputs=self.nn_model.inputs,
                                     outputs=prob_vec)

        target_token_ids = Lambda(lambda x: x[:, 1:],
                                  name="target_tokens")(input_ids)
        prob_vec = Lambda(lambda x: x[:, :-1], name="prob_vec")(prob_vec)
        loss_mask = Lambda(lambda x: x[:, 1:], name="loss_mask")(segment_ids)

        loss_layer = build_classify_loss_layer(multi_label=False,
                                               with_mask=True)
        loss = loss_layer([target_token_ids, prob_vec, loss_mask])
        self.train_model.add_loss(loss)

        accuracy_func = masked_sparse_categorical_accuracy
        metric_layer = MetricLayer(accuracy_func, name="metric_layer")

        accuracy = metric_layer([target_token_ids, prob_vec, loss_mask])

        self.train_model.add_metric(accuracy,
                                    aggregation="mean",
                                    name="accuracy")
        optimizer = OptimizerFactory.create(optimizer_name, optimizer_args)
        self.train_model.compile(optimizer=optimizer)

        logger.info("training model's summary:")
        self.train_model.summary(print_fn=logger.info)
        self._update_model_dict("train", self.train_model)
        self._build_gen_model()

    def _build_gen_model(self):

        with self.get_scope():
            token_lens = Input(shape=(), dtype=tf.int32, name='token_len')
            inputs = self.nn_model.inputs + [token_lens]
            prob_vec = self.nn_model.output
            extract_last_token_layer = ExtractLastTokenLayer(
                name="extract_last_token_layer")
            last_prob = extract_last_token_layer([prob_vec, token_lens])

            self.gen_model = Model(inputs=inputs,
                                   outputs=last_prob,
                                   name="last_token_model")
        logger.info("gen model's summary:")
        self.gen_model.summary(print_fn=logger.info)
        self._update_model_dict("gen", self.gen_model)
        return self.gen_model

    def example2feature(self, example: UnionSeq2SeqExample) -> Dict:
        src_text = example.text + example.extra_text if example.extra_text else example.text
        tgt_txt = example.tgt_text.text if isinstance(
            example, LabeledSeq2SeqExample) else None
        feature = self.tokenizer.do_tokenize(text=src_text, extra_text=tgt_txt)
        origin_token_len = len(feature["segment_ids"]) - sum(
            feature["segment_ids"])
        feature.update(origin_token_len=origin_token_len)
        return feature

    def _feature2records(self,
                         idx,
                         feature: Dict,
                         mode: str,
                         only_copy=False) -> List[Dict]:
        record = dict(idx=idx, **feature)
        if mode == "gen":
            record.update(score=0.)
            origin_token_len = record["origin_token_len"]
            record["token_ids"] = record["token_ids"][:origin_token_len]
            record["tokens"] = record["tokens"][:origin_token_len]
            record["segment_ids"] = record["segment_ids"][:origin_token_len]
        record.update(token_len=len(record["tokens"]))
        truncate_record(record=record,
                        max_len=self.max_len,
                        keys=["token_ids", "segment_ids", "tokens"])
        return [record]

    def _gen_predict(self, records, topk, batch_size, only_copy=False):
        dataset_type, dataset_shape = self.get_dataset_info(mode="gen")
        test_batches = records2batches(records, dataset_shape, batch_size)

        logger.info("predicting with tf model...")
        pred_tensor_data = []
        for batch in test_batches:
            pred_batch = self.gen_model(batch, training=False)
            pred_tensor_data.extend(pred_batch)

        topk_pred_data = tf.math.top_k(pred_tensor_data, topk)
        topk_prob_data = topk_pred_data.values.numpy().tolist()
        topk_id_data = topk_pred_data.indices.numpy().tolist()

        preds = []
        for token_ids, probs in zip(topk_id_data, topk_prob_data):
            pred = [(i, self.tokenizer.id2token(i), p)
                    for i, p in zip(token_ids, probs)]
            preds.append(pred)
        return preds

    @discard_kwarg
    @log_cost_time
    def _post_predict(self,
                      features,
                      pred_tensors,
                      show_detail=False,
                      threshold=.5) -> List[List[GenText]]:
        def _tensor2output(feature, pred_tensor) -> List[str]:
            token_len = len(feature["tokens"])
            origin_token_len = feature["origin_token_len"]
            pred_tensor = pred_tensor[origin_token_len - 1:token_len - 2]
            pred_token_ids = tf.argmax(pred_tensor, axis=-1).numpy().tolist()
            pred_tokens = [self.tokenizer.id2token(e) for e in pred_token_ids]
            if show_detail:
                logger.info("pred tokens:")
                logger.info(list(zip(pred_token_ids, pred_tokens)))
            # text = self.tokenizer.decode(pred_tokens)
            text = "".join(pred_tokens)
            return [GenText(text=text)]

        preds = [
            _tensor2output(feature, tensor)
            for feature, tensor in zip(features, pred_tensors)
        ]
        return preds

    def _predict(self,
                 data_manager: DataManager,
                 batch_size,
                 mode="test",
                 gen_kwargs={},
                 max_pred_num=None,
                 tf_serving_url=None,
                 show_detail=False,
                 **kwargs) -> List[List[GenText]]:

        if mode == "test":
            return super()._predict(data_manager=data_manager,
                                    batch_size=batch_size,
                                    show_detail=show_detail,
                                    max_pred_num=max_pred_num,
                                    tf_serving_url=tf_serving_url,
                                    **kwargs)

            dataset = self._pre_process(data_manager=data_manager,
                                        batch_size=batch_size,
                                        mode=mode,
                                        max_num=max_pred_num)
            pred_tensors = self._model_predict(dataset=dataset,
                                               model=self.nn_model,
                                               tf_serving_url=tf_serving_url,
                                               show_detail=show_detail)
            features = data_manager.get_features(max_num=max_pred_num)
            preds = self._post_predict(features=features,
                                       pred_tensors=pred_tensors,
                                       show_detail=show_detail)
            return preds
        if mode == "gen":
            beam_searcher = BeamSearcher(pred_func=self._gen_predict)
            records = data_manager.get_records(mode=mode,
                                               return_generator=False,
                                               max_num=max_pred_num)
            preds = beam_searcher.run(records=records,
                                      batch_size=batch_size,
                                      show_detail=show_detail,
                                      **gen_kwargs)
            assert len(preds) == len(records)
            preds = [[
                GenText(text="".join(
                    item["tokens"][item["origin_token_len"]:-1]),
                        prob=math.exp(item["score"])) for item in pred
            ] for pred in preds]
            return preds
        raise ValueError(f"invalid mode:{mode}")
Beispiel #10
0
    args = parser.parse_args()
    models = (encoder, decoder)
    data = (x_test, y_test)

    # VAE loss = mse_loss or xent_loss + kl_loss
    if args.mse:
        reconstruction_loss = mse(inputs, outputs)
    else:
        reconstruction_loss = binary_crossentropy(inputs, outputs)

    reconstruction_loss *= original_dim
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer='adam')
    vae.summary()
    plot_model(vae, to_file='vae_mlp.png', show_shapes=True)

    if args.weights:
        vae.load_weights(args.weights)
    else:
        # train the autoencoder
        vae.fit(x_train,
                epochs=epochs,
                batch_size=batch_size,
                validation_data=(x_test, None))
        vae.save_weights('vae_mlp_mnist.h5')

    plot_results(models, data, batch_size=batch_size, model_name="vae_mlp")