예제 #1
0
def main(_):
    print("Loading hyperparameters..")
    params = util.load_params(FLAGS.params_file)

    print("Building model..")
    model_dir = FLAGS.model_dir
    if FLAGS.clean_model_dir:
        util.clean_model_dir(model_dir)
    if FLAGS.model_cls == "transformer":
        model_cls = TransformerEstimator
    elif FLAGS.model_cls == "seq2seq":
        model_cls = Seq2SeqEstimator
    else:
        raise ValueError("Model class not supported.")
    model = model_cls(model_dir, params)

    print("Getting sources..")
    fields = {"train/inputs": "int", "train/targets": "int"}
    train_source = DataSource(FLAGS.train_file, fields)
    test_source = DataSource(FLAGS.test_file, fields)

    field_map = {"inputs": "train/inputs", "targets": "train/targets"}
    train_input_fn = train_source.get_input_fn(
        "train_in", field_map, None, FLAGS.batch_size)
    test_input_fn = test_source.get_input_fn(
        "test_in", field_map, 1, FLAGS.batch_size)

    print("Processing model..")
    model.train(train_input_fn, steps=FLAGS.train_batches)
    model.evaluate(test_input_fn)

    if FLAGS.interactive:
        print("Interactive decoding...")
        vocab = Vocabulary(fname=params["vocab_file"])
        decoding.cmd_decode(model, vocab)
예제 #2
0
def main(_):
    print("Loading hyperparameters..")
    params = util.load_params(FLAGS.params_file)

    print("Building model..")
    validation_config = tf.estimator.RunConfig(
        save_checkpoints_steps=100,
        keep_checkpoint_max=None,
    )
    model_dir = FLAGS.model_dir
    if FLAGS.clean_model_dir:
        util.clean_model_dir(model_dir)
    if FLAGS.model_cls == "transformer":
        model_cls = TransformerEstimator
    elif FLAGS.model_cls == "seq2seq":
        model_cls = Seq2SeqEstimator
    else:
        raise ValueError("Model class not supported.")
    model = model_cls(model_dir, params, config=validation_config)

    print("Getting sources..")
    fields = {"train/inputs": "int", "train/targets": "int"}
    train_source = DataSource(FLAGS.train_file, fields)
    test_source = DataSource(FLAGS.test_file, fields)

    field_map = {"inputs": "train/inputs", "targets": "train/targets"}
    train_input_fn = train_source.get_input_fn("train_in", field_map, None,
                                               FLAGS.batch_size)
    test_input_fn = test_source.get_input_fn("test_in", field_map, 1,
                                             FLAGS.batch_size)

    print("Processing model..")
    model.train(train_input_fn, steps=FLAGS.train_batches)
    model.choose_best_checkpoint(test_input_fn)
    model.evaluate(test_input_fn)

    if FLAGS.interaction != "off":
        print("Interactive decoding...")
        vocab = Vocabulary(fname=params["vocab_file"])
        if FLAGS.interaction == "cmd":
            decoding.cmd_decode(model, vocab, persona=True)
        elif FLAGS.interaction == "gui":
            decoding.gui_decode(model, vocab)
예제 #3
0
def main(_):
    print("Loading parameters..")
    params = util.load_params(FLAGS.params_file)

    print("Building model..")
    model_dir = FLAGS.model_dir
    if FLAGS.clean_model_dir:
        util.clean_model_dir(model_dir)
    first_model = PersonaSeq2SeqEstimator(model_dir, params, scope="first")
    second_model_encoder = Seq2SeqEncoderEstimator(model_dir,
                                                   params,
                                                   scope="second_encoder")
    second_model = EstimatorChain([second_model_encoder, first_model.decoder],
                                  model_dir,
                                  params,
                                  scope="second")
    mmi_model = PersonaSeq2SeqEstimator(model_dir,
                                        params,
                                        scope="mmi",
                                        is_mmi_model=True)
    model_group = EstimatorGroup([first_model, second_model, mmi_model],
                                 model_dir,
                                 params,
                                 scope="group")

    print("Getting sources..")
    fields = {
        "train/inputs": "int",
        "train/targets": "int",
        "train/speakers": "int"
    }
    train_source = DataSource(FLAGS.train_file, fields)
    autoenc_source = DataSource(FLAGS.autoenc_file, fields)
    test_source = DataSource(FLAGS.test_file, fields)

    train_field_map = {
        "inputs": "train/inputs",
        "targets": "train/targets",
        "speaker_ids": "train/speakers"
    }
    autoenc_field_map = {
        "inputs": "train/inputs",
        "targets": "train/inputs",
        "speaker_ids": "train/speakers"
    }
    mmi_field_map = {
        "inputs": "train/targets",
        "targets": "train/inputs",
        "speaker_ids": "train/speakers"
    }

    paired_input_fn = train_source.get_input_fn("paired_in", train_field_map,
                                                None, FLAGS.batch_size)
    autoenc_input_fn = train_source.get_input_fn("autoenc_in",
                                                 autoenc_field_map, None,
                                                 FLAGS.batch_size)
    mmi_input_fn = train_source.get_input_fn("mmi_in", mmi_field_map, None,
                                             FLAGS.batch_size)
    train_input_fn = DataSource.group_input_fns(
        ["first", "second", "mmi"],
        [paired_input_fn, autoenc_input_fn, mmi_input_fn])
    test_input_fn = test_source.get_input_fn("test_in", train_field_map, 1,
                                             FLAGS.batch_size)

    print("Processing models..")
    print("Pretraining primary model..")
    model_group.train(train_input_fn,
                      first_model,
                      steps=FLAGS.pretrain_batches)
    print("Multitask training..")
    model_group.train(train_input_fn, {
        "first": 1,
        "second": 1,
        "mmi": 0
    },
                      steps=FLAGS.train_batches)
    print("Training MMI model..")
    model_group.train(train_input_fn, mmi_model, steps=FLAGS.mmi_batches)
    print("Evaluating..")
    model_group.evaluate(test_input_fn, first_model)

    if FLAGS.interactive:
        print("Interactive decoding...")
        vocab = Vocabulary(fname=params["vocab_file"])
        decoding.cmd_decode(first_model,
                            vocab,
                            persona=True,
                            mmi_component=mmi_model)
예제 #4
0
def main(_):
    '''
    This is a simple example of how to build an Icecaps training script, and is essentially
    the "Hello World" of Icecaps. Icecaps training scripts follow a basic five-phase pattern
    that we describe here. We train a basic model on the paired data stored in
    dummy_data/paired_personalized.tfrecord. For information on how to build TFRecords
    from text data files, please see data_processing_example.py.
    '''

    print("Loading hyperparameters..")
    # The first phase is to load hyperparameters from a .params file. These files follow a
    # simple colon-delimited format (e.g. see dummy_params/simple_example_seq2seq.params).
    params = util.load_params(FLAGS.params_file)

    print("Building model..")
    # Second, we build our architecture based on our loaded hyperparameters. Our architecture
    # here is very basic: we use a simple LSTM-based seq2seq model. For information on more
    # complex architectures, wee train_persona_mmi_example.py.
    model_dir = FLAGS.model_dir
    if FLAGS.clean_model_dir:
        util.clean_model_dir(model_dir)
    model_cls = Seq2SeqEstimator

    # Every estimator expects a different set of hyperparmeters. If you set use_default_params
    # to True in your .params file, the estimator will employ default values for any unspecified
    # hyperparameters. To view the list of hyperparmeters with default values, you can run the
    # class method list_params(). E.g. you can open a Python session and run
    # Seq2SeqEstimator.list_params() to view what hyperparameters our seq2seq estimator expects.
    model = model_cls(model_dir, params)

    print("Getting sources..")
    # Third, we set up our data sources. DataSource objects allow you to build input_fns that
    # efficiently feed data into the training pipeline from TFRecord files. In our simple example,
    # we set up two data sources: one for training and one for testing.

    # TFRecords are created with name variables per data point. You must create a fields dictionary
    # to tell the DataSource which variables to load and what their types are.
    fields = {"train/inputs": "int", "train/targets": "int"}
    train_source = DataSource(FLAGS.train_file, fields)
    test_source = DataSource(FLAGS.test_file, fields)

    # Then, you must create a field_map dictionary to tell your estimator how to map the TFRecord's
    # variable names to the names expected by the estimator. While this may seem like unnecessary
    # overhead in this simple example, it provides useful flexibility in more complex scenarios.
    field_map = {"inputs": "train/inputs", "targets": "train/targets"}

    # Finally, build input_fns from your DataSources.
    train_input_fn = train_source.get_input_fn(
        "train_in", field_map, None,
        FLAGS.batch_size)  # None lets our input_fn run for an unbounded
    # number of epochs.
    test_input_fn = test_source.get_input_fn(
        "test_in", field_map, 1,
        FLAGS.batch_size)  # For testing, we only want to run the input_fn
    # for one epoch instead.

    print("Processing model..")
    # Fourth, we pipe our input_fns through our model for training and evaluation.
    model.train(train_input_fn, steps=FLAGS.train_batches)
    model.evaluate(test_input_fn)

    if FLAGS.interactive:
        print("Interactive decoding...")
        # Fifth, you may optionally set up an interactive session to test your system by directly
        # engaging with it.
        vocab = Vocabulary(fname=params["vocab_file"])
        decoding.cmd_decode(model, vocab)
예제 #5
0
def main(_):
    '''
    This is a more complex example in which we build an Icecaps script involving
    component chaining and multi-task learning. We recommend you start with
    train_simple_example.py. In this example, we build a personalized conversation system
    that combines paired and unpaired data, and applies MMI during decoding.
    '''

    print("Loading parameters..")
    # When multiple estimators are involved, you can specify which hyperparameters in your
    # params file belong to which estimator using scoping. See dummy_params/persona_mmi_example.params
    # for an example. If no scope is specified, the hyperparameter is provided to all
    # models in your architecture.
    params = util.load_params(FLAGS.params_file)

    print("Building model..")
    model_dir = FLAGS.model_dir
    if FLAGS.clean_model_dir:
        util.clean_model_dir(model_dir)

    # For this system, we will need to build three different estimators.
    # The first estimator is a personalized seq2seq estimator that will be responsible for
    # learning the conversational model.
    first_model = PersonaSeq2SeqEstimator(model_dir, params, scope="first")

    # The second estimator is a personalized seq2seq estimator that shares its decoder with
    # the first model. This model will learn an autoencoder on an unpaired personalized
    # data set. The purpose of this configuration is to influence the first model with
    # stylistic information from the unpaired dataset.

    # To construct this second estimator, we first build a seq2seq encoder separate from
    # the first model. Then, we use an EstimatorChain to chain that encoder to the first
    # model's decoder, allowing the two models to share that decoder.
    second_model_encoder = Seq2SeqEncoderEstimator(model_dir,
                                                   params,
                                                   scope="second_encoder")
    second_model = EstimatorChain([second_model_encoder, first_model.decoder],
                                  model_dir,
                                  params,
                                  scope="second")

    # The third estimator is used for MMI decoding. This model will learn the inverse
    # function of the first model. During decoding, this estimator will be used to rerank
    # hypotheses generated by the first model during beam search decoding. While this
    # won't have much of an effect on our toy data sets, the purpose of this model in
    # real-world settings is to penalize generic responses applicable to many contexts
    # such as "I don't know."
    mmi_model = PersonaSeq2SeqEstimator(model_dir,
                                        params,
                                        scope="mmi",
                                        is_mmi_model=True)
    model_group = EstimatorGroup([first_model, second_model, mmi_model],
                                 model_dir,
                                 params,
                                 scope="group")

    print("Getting sources..")
    # We will use two DataSources for training and one for testing.
    fields = {
        "train/inputs": "int",
        "train/targets": "int",
        "train/speakers": "int"
    }
    paired_source = DataSource(FLAGS.paired_file, fields)
    unpaired_source = DataSource(FLAGS.unpaired_file, fields)
    test_source = DataSource(FLAGS.test_file, fields)

    # We construct three field maps.
    # The paired field map is similar to the field map shown in train_simple_example.py
    # The unpaired field map maps train/inputs to both the estimator's inputs and targets,
    # in order to train an autoencoder.
    # The mmi field maps maps train/inputs to targets and train/targets to inputs, in
    # order to learn the inverse of the first estimator.
    paired_field_map = {
        "inputs": "train/inputs",
        "targets": "train/targets",
        "speaker_ids": "train/speakers"
    }
    unpaired_field_map = {
        "inputs": "train/inputs",
        "targets": "train/inputs",
        "speaker_ids": "train/speakers"
    }
    mmi_field_map = {
        "inputs": "train/targets",
        "targets": "train/inputs",
        "speaker_ids": "train/speakers"
    }

    paired_input_fn = paired_source.get_input_fn("paired_in", paired_field_map,
                                                 None, FLAGS.batch_size)
    unpaired_input_fn = unpaired_source.get_input_fn("unpaired_in",
                                                     unpaired_field_map, None,
                                                     FLAGS.batch_size)
    mmi_input_fn = paired_source.get_input_fn("mmi_in", mmi_field_map, None,
                                              FLAGS.batch_size)
    # For multi-task learning, you will need to group your input_fns together with group_input_fns().
    train_input_fn = DataSource.group_input_fns(
        ["first", "second", "mmi"],
        [paired_input_fn, unpaired_input_fn, mmi_input_fn])
    test_input_fn = test_source.get_input_fn("test_in", paired_field_map, 1,
                                             FLAGS.batch_size)

    print("Processing models..")
    # Icecaps supports flexible multi-task training pipelines. You can set up multiple phases
    # where each phase trains your architecture with different weights across your objectives.
    # In this example, we will first pre-train the first model by itself, then jointly train
    # the first and second models, then finally train the MMI model by itself.
    print("Pretraining primary model..")
    model_group.train(train_input_fn,
                      first_model,
                      steps=FLAGS.pretrain_batches)
    print("Multitask training..")
    model_group.train(train_input_fn, {
        "first": 1,
        "second": 1,
        "mmi": 0
    },
                      steps=FLAGS.train_batches)
    print("Training MMI model..")
    model_group.train(train_input_fn, mmi_model, steps=FLAGS.mmi_batches)
    print("Evaluating..")
    model_group.evaluate(test_input_fn, first_model)

    if FLAGS.interactive:
        print("Interactive decoding...")
        vocab = Vocabulary(fname=params["vocab_file"])
        # To decode with MMI, you can pass in your MMI model to cmd_decode().
        # lambda_balance represents how the first model and MMI model's scores are weighted during decoding.
        decoding.cmd_decode(first_model,
                            vocab,
                            persona=True,
                            mmi_component=mmi_model,
                            lambda_balance=FLAGS.lambda_balance)