def get_model_and_train_step(): inputs = Input(shape=(1, )) targets = Input(shape=(1, )) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(MAE()(targets, outputs)) model.add_loss(math_ops.reduce_mean(mae(targets, outputs))) return get_ctl_train_step(model)
def test_invalid_constant_input(self): inputs = Input(shape=(1, )) outputs = testing_utils.Bias()(inputs) model = Model(inputs, outputs) with self.assertRaisesRegex( ValueError, 'Expected a symbolic Tensors or a callable for the loss value' ): model.add_loss(1.)
def test_invalid_variable_input(self): with context.eager_mode(): inputs = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model(inputs, outputs) with self.assertRaisesRegexp( ValueError, 'Expected a symbolic Tensors or a callable for the loss value'): model.add_loss(model.weights[0])
def get_model_and_train_step(): inputs = Input(shape=(1, )) targets = Input(shape=(1, )) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) def callable_loss(): return math_ops.reduce_sum(model.weights) model.add_loss(callable_loss) return get_ctl_train_step(model)
def test_add_entropy_loss_on_functional_model(self): inputs = Input(shape=(1, )) targets = Input(shape=(1, )) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(losses.binary_crossentropy(targets, outputs)) model.compile('sgd', run_eagerly=testing_utils.should_run_eagerly()) with test.mock.patch.object(logging, 'warning') as mock_log: model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertNotIn('Gradients do not exist for variables', str(mock_log.call_args))
def test_loss_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets], outputs) model.add_loss(MAE()(targets, outputs)) model.add_loss(math_ops.reduce_mean(mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.05), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3)
def test_loss_with_sample_weight_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) sw = Input(shape=(1,)) outputs = testing_utils.Bias()(inputs) model = Model([inputs, targets, sw], outputs) model.add_loss(MAE()(targets, outputs, sw)) model.add_loss(3 * math_ops.reduce_mean(sw * mae(targets, outputs))) model.compile( optimizer_v2.gradient_descent.SGD(0.025), run_eagerly=testing_utils.should_run_eagerly()) history = model.fit([self.x, self.y, self.w], batch_size=3, epochs=5) self.assertAllClose(history.history['loss'], [4., 3.6, 3.2, 2.8, 2.4], 1e-3)
def make_variational_autoencoder(original_dim, intermediate_dim=512, latent_dim=2, reconstruction_loss_fn=binary_crossentropy): # VAE model = encoder + decoder # build encoder model inputs = Input(shape=(original_dim, ), name='encoder_input') x = Dense(intermediate_dim, activation='relu')(inputs) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, z, name='encoder') # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(intermediate_dim, activation='relu')(latent_inputs) outputs = Dense(original_dim, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') # instantiate VAE model outputs = decoder(encoder(inputs)) vae = Model(inputs, outputs, name='vae_mlp') reconstruction_loss = reconstruction_loss_fn(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') return vae, encoder
class TransformerSeq2SeqModel(AbstractSeq2SeqModel, TFBasedModel): custom_objects = {"ExtractLastTokenLayer": ExtractLastTokenLayer} def _load_config(self, config): super()._load_config(config) self.last_token_model = None self.max_len = self.task_config["max_len"] self.keep_token_file = self.task_config.get("keep_token_file") self.keep_token_ids = None self.extra_tokens = [] self.extra_token_ids = [] self.extra_token_file = self.task_config.get("extra_token_file") if self.extra_token_file: self.extra_tokens = load_lines(self.extra_token_file) def build_model(self, pretrained_model_path=None, pretrained_model_tag="bert", transformer_kwargs={}, h5_file=None, **kwargs): with self.get_scope(): if hasattr(self, 'keep_token_ids'): transformer_kwargs.update(keep_tokens=self.keep_token_ids) self.nn_model = get_unilm_model( pretrained_model_path, pretrained_model_tag="bert", transformer_kwargs=transformer_kwargs, h5_file=h5_file) logger.info("nn model's summary:") self.nn_model.summary(print_fn=logger.info) self._update_model_dict("test", self.nn_model) return self.nn_model def compile_model(self, optimizer_name: str, optimizer_args: str, **kwargs): logger.info( f"compile model with optimizer_name:{optimizer_name}, optimizer_args:{optimizer_args}" ) with self.get_scope(): input_ids, segment_ids = self.nn_model.inputs[:2] prob_vec = self.nn_model(self.nn_model.inputs) self.train_model = Model(inputs=self.nn_model.inputs, outputs=prob_vec) target_token_ids = Lambda(lambda x: x[:, 1:], name="target_tokens")(input_ids) prob_vec = Lambda(lambda x: x[:, :-1], name="prob_vec")(prob_vec) loss_mask = Lambda(lambda x: x[:, 1:], name="loss_mask")(segment_ids) loss_layer = build_classify_loss_layer(multi_label=False, with_mask=True) loss = loss_layer([target_token_ids, prob_vec, loss_mask]) self.train_model.add_loss(loss) accuracy_func = masked_sparse_categorical_accuracy metric_layer = MetricLayer(accuracy_func, name="metric_layer") accuracy = metric_layer([target_token_ids, prob_vec, loss_mask]) self.train_model.add_metric(accuracy, aggregation="mean", name="accuracy") optimizer = OptimizerFactory.create(optimizer_name, optimizer_args) self.train_model.compile(optimizer=optimizer) logger.info("training model's summary:") self.train_model.summary(print_fn=logger.info) self._update_model_dict("train", self.train_model) self._build_gen_model() def _build_gen_model(self): with self.get_scope(): token_lens = Input(shape=(), dtype=tf.int32, name='token_len') inputs = self.nn_model.inputs + [token_lens] prob_vec = self.nn_model.output extract_last_token_layer = ExtractLastTokenLayer( name="extract_last_token_layer") last_prob = extract_last_token_layer([prob_vec, token_lens]) self.gen_model = Model(inputs=inputs, outputs=last_prob, name="last_token_model") logger.info("gen model's summary:") self.gen_model.summary(print_fn=logger.info) self._update_model_dict("gen", self.gen_model) return self.gen_model def example2feature(self, example: UnionSeq2SeqExample) -> Dict: src_text = example.text + example.extra_text if example.extra_text else example.text tgt_txt = example.tgt_text.text if isinstance( example, LabeledSeq2SeqExample) else None feature = self.tokenizer.do_tokenize(text=src_text, extra_text=tgt_txt) origin_token_len = len(feature["segment_ids"]) - sum( feature["segment_ids"]) feature.update(origin_token_len=origin_token_len) return feature def _feature2records(self, idx, feature: Dict, mode: str, only_copy=False) -> List[Dict]: record = dict(idx=idx, **feature) if mode == "gen": record.update(score=0.) origin_token_len = record["origin_token_len"] record["token_ids"] = record["token_ids"][:origin_token_len] record["tokens"] = record["tokens"][:origin_token_len] record["segment_ids"] = record["segment_ids"][:origin_token_len] record.update(token_len=len(record["tokens"])) truncate_record(record=record, max_len=self.max_len, keys=["token_ids", "segment_ids", "tokens"]) return [record] def _gen_predict(self, records, topk, batch_size, only_copy=False): dataset_type, dataset_shape = self.get_dataset_info(mode="gen") test_batches = records2batches(records, dataset_shape, batch_size) logger.info("predicting with tf model...") pred_tensor_data = [] for batch in test_batches: pred_batch = self.gen_model(batch, training=False) pred_tensor_data.extend(pred_batch) topk_pred_data = tf.math.top_k(pred_tensor_data, topk) topk_prob_data = topk_pred_data.values.numpy().tolist() topk_id_data = topk_pred_data.indices.numpy().tolist() preds = [] for token_ids, probs in zip(topk_id_data, topk_prob_data): pred = [(i, self.tokenizer.id2token(i), p) for i, p in zip(token_ids, probs)] preds.append(pred) return preds @discard_kwarg @log_cost_time def _post_predict(self, features, pred_tensors, show_detail=False, threshold=.5) -> List[List[GenText]]: def _tensor2output(feature, pred_tensor) -> List[str]: token_len = len(feature["tokens"]) origin_token_len = feature["origin_token_len"] pred_tensor = pred_tensor[origin_token_len - 1:token_len - 2] pred_token_ids = tf.argmax(pred_tensor, axis=-1).numpy().tolist() pred_tokens = [self.tokenizer.id2token(e) for e in pred_token_ids] if show_detail: logger.info("pred tokens:") logger.info(list(zip(pred_token_ids, pred_tokens))) # text = self.tokenizer.decode(pred_tokens) text = "".join(pred_tokens) return [GenText(text=text)] preds = [ _tensor2output(feature, tensor) for feature, tensor in zip(features, pred_tensors) ] return preds def _predict(self, data_manager: DataManager, batch_size, mode="test", gen_kwargs={}, max_pred_num=None, tf_serving_url=None, show_detail=False, **kwargs) -> List[List[GenText]]: if mode == "test": return super()._predict(data_manager=data_manager, batch_size=batch_size, show_detail=show_detail, max_pred_num=max_pred_num, tf_serving_url=tf_serving_url, **kwargs) dataset = self._pre_process(data_manager=data_manager, batch_size=batch_size, mode=mode, max_num=max_pred_num) pred_tensors = self._model_predict(dataset=dataset, model=self.nn_model, tf_serving_url=tf_serving_url, show_detail=show_detail) features = data_manager.get_features(max_num=max_pred_num) preds = self._post_predict(features=features, pred_tensors=pred_tensors, show_detail=show_detail) return preds if mode == "gen": beam_searcher = BeamSearcher(pred_func=self._gen_predict) records = data_manager.get_records(mode=mode, return_generator=False, max_num=max_pred_num) preds = beam_searcher.run(records=records, batch_size=batch_size, show_detail=show_detail, **gen_kwargs) assert len(preds) == len(records) preds = [[ GenText(text="".join( item["tokens"][item["origin_token_len"]:-1]), prob=math.exp(item["score"])) for item in pred ] for pred in preds] return preds raise ValueError(f"invalid mode:{mode}")
args = parser.parse_args() models = (encoder, decoder) data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss if args.mse: reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if args.weights: vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) vae.save_weights('vae_mlp_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_mlp")