def list_params(cls, expected_params=None): print("Seq2Seq Encoder:") Seq2SeqEncoderEstimator.list_params(expected_params) print() print("PersonaSeq2Seq Decoder:") PersonaSeq2SeqDecoderEstimator.list_params(expected_params) print()
def __init__(self, model_dir, params, config=None, scope="default"): self.core_encoder = Seq2SeqEncoderEstimator(model_dir, params, scope="core_encoder") self.ae_encoder = Seq2SeqEncoderEstimator(model_dir, params, scope="ae_encoder") self.noise = NoiseLayer(model_dir, params, scope="noise") self.decoder = Seq2SeqDecoderEstimator(model_dir, params, scope="decoder") self.core_model = EstimatorChain([self.core_encoder, self.decoder], model_dir, params, scope="core") self.noisy_core_model = EstimatorChain([self.core_encoder, self.noise, self.decoder], model_dir, params, scope="noisy_core") self.autoencoder = EstimatorChain([self.ae_encoder, self.noise, self.decoder], model_dir, params, scope="ae") self.loss_balance = [1.0, 1.0, 1.0, 1.0] super().__init__([self.core_model, self.noisy_core_model, self.autoencoder], model_dir, params, config=config, scope=scope)
def __init__(self, model_dir="/tmp", params=dict(), config=None, scope="", is_mmi_model=False): self.encoder = Seq2SeqEncoderEstimator(model_dir, params, config=config, scope=scope + "/encoder") self.decoder = Seq2SeqDecoderEstimator(model_dir, params, config=config, scope=scope + "/decoder", is_mmi_model=is_mmi_model) super().__init__([self.encoder, self.decoder], model_dir, params, config, scope)
def main(_): print("Loading parameters..") params = util.load_params(FLAGS.params_file) print("Building model..") model_dir = FLAGS.model_dir if FLAGS.clean_model_dir: util.clean_model_dir(model_dir) first_model = PersonaSeq2SeqEstimator(model_dir, params, scope="first") second_model_encoder = Seq2SeqEncoderEstimator(model_dir, params, scope="second_encoder") second_model = EstimatorChain([second_model_encoder, first_model.decoder], model_dir, params, scope="second") mmi_model = PersonaSeq2SeqEstimator(model_dir, params, scope="mmi", is_mmi_model=True) model_group = EstimatorGroup([first_model, second_model, mmi_model], model_dir, params, scope="group") print("Getting sources..") fields = { "train/inputs": "int", "train/targets": "int", "train/speakers": "int" } train_source = DataSource(FLAGS.train_file, fields) autoenc_source = DataSource(FLAGS.autoenc_file, fields) test_source = DataSource(FLAGS.test_file, fields) train_field_map = { "inputs": "train/inputs", "targets": "train/targets", "speaker_ids": "train/speakers" } autoenc_field_map = { "inputs": "train/inputs", "targets": "train/inputs", "speaker_ids": "train/speakers" } mmi_field_map = { "inputs": "train/targets", "targets": "train/inputs", "speaker_ids": "train/speakers" } paired_input_fn = train_source.get_input_fn("paired_in", train_field_map, None, FLAGS.batch_size) autoenc_input_fn = train_source.get_input_fn("autoenc_in", autoenc_field_map, None, FLAGS.batch_size) mmi_input_fn = train_source.get_input_fn("mmi_in", mmi_field_map, None, FLAGS.batch_size) train_input_fn = DataSource.group_input_fns( ["first", "second", "mmi"], [paired_input_fn, autoenc_input_fn, mmi_input_fn]) test_input_fn = test_source.get_input_fn("test_in", train_field_map, 1, FLAGS.batch_size) print("Processing models..") print("Pretraining primary model..") model_group.train(train_input_fn, first_model, steps=FLAGS.pretrain_batches) print("Multitask training..") model_group.train(train_input_fn, { "first": 1, "second": 1, "mmi": 0 }, steps=FLAGS.train_batches) print("Training MMI model..") model_group.train(train_input_fn, mmi_model, steps=FLAGS.mmi_batches) print("Evaluating..") model_group.evaluate(test_input_fn, first_model) if FLAGS.interactive: print("Interactive decoding...") vocab = Vocabulary(fname=params["vocab_file"]) decoding.cmd_decode(first_model, vocab, persona=True, mmi_component=mmi_model)
def main(_): ''' This is a more complex example in which we build an Icecaps script involving component chaining and multi-task learning. We recommend you start with train_simple_example.py. In this example, we build a personalized conversation system that combines paired and unpaired data, and applies MMI during decoding. ''' print("Loading parameters..") # When multiple estimators are involved, you can specify which hyperparameters in your # params file belong to which estimator using scoping. See dummy_params/persona_mmi_example.params # for an example. If no scope is specified, the hyperparameter is provided to all # models in your architecture. params = util.load_params(FLAGS.params_file) print("Building model..") model_dir = FLAGS.model_dir if FLAGS.clean_model_dir: util.clean_model_dir(model_dir) # For this system, we will need to build three different estimators. # The first estimator is a personalized seq2seq estimator that will be responsible for # learning the conversational model. first_model = PersonaSeq2SeqEstimator(model_dir, params, scope="first") # The second estimator is a personalized seq2seq estimator that shares its decoder with # the first model. This model will learn an autoencoder on an unpaired personalized # data set. The purpose of this configuration is to influence the first model with # stylistic information from the unpaired dataset. # To construct this second estimator, we first build a seq2seq encoder separate from # the first model. Then, we use an EstimatorChain to chain that encoder to the first # model's decoder, allowing the two models to share that decoder. second_model_encoder = Seq2SeqEncoderEstimator(model_dir, params, scope="second_encoder") second_model = EstimatorChain([second_model_encoder, first_model.decoder], model_dir, params, scope="second") # The third estimator is used for MMI decoding. This model will learn the inverse # function of the first model. During decoding, this estimator will be used to rerank # hypotheses generated by the first model during beam search decoding. While this # won't have much of an effect on our toy data sets, the purpose of this model in # real-world settings is to penalize generic responses applicable to many contexts # such as "I don't know." mmi_model = PersonaSeq2SeqEstimator(model_dir, params, scope="mmi", is_mmi_model=True) model_group = EstimatorGroup([first_model, second_model, mmi_model], model_dir, params, scope="group") print("Getting sources..") # We will use two DataSources for training and one for testing. fields = { "train/inputs": "int", "train/targets": "int", "train/speakers": "int" } paired_source = DataSource(FLAGS.paired_file, fields) unpaired_source = DataSource(FLAGS.unpaired_file, fields) test_source = DataSource(FLAGS.test_file, fields) # We construct three field maps. # The paired field map is similar to the field map shown in train_simple_example.py # The unpaired field map maps train/inputs to both the estimator's inputs and targets, # in order to train an autoencoder. # The mmi field maps maps train/inputs to targets and train/targets to inputs, in # order to learn the inverse of the first estimator. paired_field_map = { "inputs": "train/inputs", "targets": "train/targets", "speaker_ids": "train/speakers" } unpaired_field_map = { "inputs": "train/inputs", "targets": "train/inputs", "speaker_ids": "train/speakers" } mmi_field_map = { "inputs": "train/targets", "targets": "train/inputs", "speaker_ids": "train/speakers" } paired_input_fn = paired_source.get_input_fn("paired_in", paired_field_map, None, FLAGS.batch_size) unpaired_input_fn = unpaired_source.get_input_fn("unpaired_in", unpaired_field_map, None, FLAGS.batch_size) mmi_input_fn = paired_source.get_input_fn("mmi_in", mmi_field_map, None, FLAGS.batch_size) # For multi-task learning, you will need to group your input_fns together with group_input_fns(). train_input_fn = DataSource.group_input_fns( ["first", "second", "mmi"], [paired_input_fn, unpaired_input_fn, mmi_input_fn]) test_input_fn = test_source.get_input_fn("test_in", paired_field_map, 1, FLAGS.batch_size) print("Processing models..") # Icecaps supports flexible multi-task training pipelines. You can set up multiple phases # where each phase trains your architecture with different weights across your objectives. # In this example, we will first pre-train the first model by itself, then jointly train # the first and second models, then finally train the MMI model by itself. print("Pretraining primary model..") model_group.train(train_input_fn, first_model, steps=FLAGS.pretrain_batches) print("Multitask training..") model_group.train(train_input_fn, { "first": 1, "second": 1, "mmi": 0 }, steps=FLAGS.train_batches) print("Training MMI model..") model_group.train(train_input_fn, mmi_model, steps=FLAGS.mmi_batches) print("Evaluating..") model_group.evaluate(test_input_fn, first_model) if FLAGS.interactive: print("Interactive decoding...") vocab = Vocabulary(fname=params["vocab_file"]) # To decode with MMI, you can pass in your MMI model to cmd_decode(). # lambda_balance represents how the first model and MMI model's scores are weighted during decoding. decoding.cmd_decode(first_model, vocab, persona=True, mmi_component=mmi_model, lambda_balance=FLAGS.lambda_balance)