Exemple #1
0
def model_fn(model_configs,
             mode,
             dataset,
             name=None,
             reuse=None,
             distributed_mode=False,
             is_chief=True,
             verbose=True):
    """ Creates NMT model for training, evaluation or inference.

    Args:
        model_configs: A dictionary of all configurations.
        mode: A mode.
        dataset: A `Dataset` object.
        name: A string, the name of top-level of the variable scope.
        reuse: Whether to reuse all variables, the parameter passed
          to `tf.variable_scope()`.
        verbose: Print model parameters if set True.
        distributed_mode: Whether training is on distributed mode.
        is_chief: Whether is the chief worker.

    Returns: A `EstimatorSpec` object.
    """
    # Create model template function
    model_str = model_configs["model"]
    if model_str is None:
        model_str = "SequenceToSequence"
    # model_name = name or model_str.split(".")[-1]
    model_name = get_model_top_scope_name(model_str, name)
    if verbose:
        tf.logging.info("Create model: {} for {}".format(model_str, mode))
    # create model instance
    model = eval(model_str)(params=model_configs["model_params"],
                            mode=mode,
                            vocab_source=dataset.vocab_source,
                            vocab_target=dataset.vocab_target,
                            name=model_name,
                            verbose=verbose)
    # create expert_utils.Parallelism
    parallelism = Parallelism(mode, reuse=reuse)

    if mode == ModeKeys.TRAIN:
        opt = OptimizerWrapper(model_configs["optimizer_params"])

    def _build_model():
        if verbose:
            tf.logging.info("Building Model.......")
        _input_fields = eval(model_str).create_input_fields(mode)
        _model_output = model.build(_input_fields)
        if verbose:
            tf.logging.info("Finish Building Model.......")
        if mode == ModeKeys.INFER:
            # model_output is prediction
            return _input_fields, _model_output
        elif mode == ModeKeys.EVAL:
            # model_output = (loss_sum, weight_sum), attention
            return _input_fields, _model_output[0], _model_output[1]
        else:  # mode == TRAIN
            # model_output = loss_sum, weight_sum
            _loss = _model_output[0] / _model_output[1]
            grads = opt.optimizer.compute_gradients(
                _loss, colocate_gradients_with_ops=True)
            return _input_fields, _model_output[0], _model_output[1], \
                   _loss, grads

    model_returns = parallelism(_build_model)
    input_fields = model_returns[0]
    if mode == ModeKeys.INFER:
        predictions = model_returns[1]
        return EstimatorSpec(mode,
                             input_fields=input_fields,
                             predictions=predictions)

    if mode == ModeKeys.EVAL:
        loss_op, attention = model_returns[1:]
        return EstimatorSpec(
            mode,
            input_fields=input_fields,
            loss=
            loss_op,  # a list of tuples [(loss_sum0, weight_sum0), (loss_sum1, weight_sum1), ...]
            # attentions for force decoding
            predictions=attention)

    assert mode == ModeKeys.TRAIN
    loss_sums = model_returns[1]
    weight_sums = model_returns[2]
    loss_per_gpu = model_returns[3]
    grads = model_returns[4]
    loss = tf.reduce_sum(loss_sums) / tf.reduce_sum(weight_sums)
    tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME,
                         Constants.TRAIN_LOSS_KEY_NAME)
    tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, loss)
    _add_to_display_collection(input_fields)
    # build train op
    train_op = opt.optimize(loss_per_gpu, gradients=grads)
    # build training hooks
    hooks = build_hooks(model_configs,
                        distributed_mode=distributed_mode,
                        is_chief=is_chief)
    from njunmt.training.text_metrics_spec import build_eval_metrics
    hooks.extend(
        build_eval_metrics(model_configs,
                           dataset,
                           is_cheif=is_chief,
                           model_name=model_name))
    return EstimatorSpec(mode,
                         input_fields=input_fields,
                         loss=loss,
                         train_op=train_op,
                         training_hooks=hooks,
                         training_chief_hooks=None)
def model_fn(
        model_configs,
        mode,
        dataset,
        name=None,
        reuse=None,
        distributed_mode=False,
        is_chief=True,
        verbose=True):
    """ Creates NMT model for training, evaluation or inference.

    Args:
        model_configs: A dictionary of all configurations.
        mode: A mode.
        dataset: A `Dataset` object.
        name: A string, the name of top-level of the variable scope.
        reuse: Whether to reuse all variables, the parameter passed
          to `tf.variable_scope()`.
        verbose: Print model parameters if set True.
        distributed_mode: Whether training is on distributed mode.
        is_chief: Whether is the chief worker.

    Returns: A `EstimatorSpec` object.
    """
    # Create model template function
    model_str = model_configs["model"]
    if model_str is None:
        model_str = "SequenceToSequence"
    # model_name = name or model_str.split(".")[-1]
    model_name = get_model_top_scope_name(model_str, name)
    if verbose:
        tf.logging.info("Create model: {} for {}".format(
            model_str, mode))
    model = eval(model_str)(
        params=model_configs["model_params"],
        mode=mode,
        vocab_source=dataset.vocab_source,
        vocab_target=dataset.vocab_target,
        name=model_name,
        verbose=verbose)
    input_fields = eval(model_str).create_input_fields(mode)
    with tf.variable_scope("", reuse=reuse):
        model_output = model.build(input_fields=input_fields)
    # training mode
    if mode == ModeKeys.TRAIN:
        loss = model_output
        # Register the training loss in a collection so that hooks can easily fetch them
        tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME, Constants.TRAIN_LOSS_KEY_NAME)
        tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, loss)
        _add_to_display_collection(input_fields)
        # build train op
        train_op = optimize(loss, model_configs["optimizer_params"])
        # build training hooks
        hooks = build_hooks(model_configs, distributed_mode=distributed_mode, is_chief=is_chief)
        from njunmt.training.text_metrics_spec import build_eval_metrics
        hooks.extend(build_eval_metrics(model_configs, dataset,
                                        is_cheif=is_chief, model_name=model_name))

        return EstimatorSpec(
            mode,
            input_fields=input_fields,
            loss=loss,
            train_op=train_op,
            training_hooks=hooks,
            training_chief_hooks=None)

    # evaluation mode
    if mode == ModeKeys.EVAL:
        loss = model_output[0]
        return EstimatorSpec(
            mode,
            input_fields=input_fields,
            loss=loss,
            # attentions for force decoding
            predictions=model_output[1])

    assert mode == ModeKeys.INFER
    return EstimatorSpec(
        mode,
        input_fields=input_fields,
        predictions=model_output)
    def run(self):
        """ Trains the model. """
        # vocabulary
        vocab_source = Vocab(
            filename=self._model_configs["data"]["source_words_vocabulary"],
            bpe_codes=self._model_configs["data"]["source_bpecodes"],
            reverse_seq=self._model_configs["train"]["features_r2l"])
        vocab_target = Vocab(
            filename=self._model_configs["data"]["target_words_vocabulary"],
            bpe_codes=self._model_configs["data"]["target_bpecodes"],
            reverse_seq=self._model_configs["train"]["labels_r2l"])
        eval_dataset = {
            "vocab_source": vocab_source,
            "vocab_target": vocab_target,
            "features_file": self._model_configs["data"]["eval_features_file"],
            "labels_file": self._model_configs["data"]["eval_labels_file"]
        }

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        estimator_spec = model_fn(model_configs=self._model_configs,
                                  mode=ModeKeys.TRAIN,
                                  vocab_source=vocab_source,
                                  vocab_target=vocab_target,
                                  name=self._model_configs["problem_name"])
        train_ops = estimator_spec.train_ops
        hooks = estimator_spec.training_hooks

        # build training session
        sess = tf.train.MonitoredSession(
            session_creator=tf.train.ChiefSessionCreator(
                scaffold=tf.train.Scaffold(),
                checkpoint_dir=None,
                master="",
                config=config),
            hooks=tuple(hooks) + tuple(
                build_eval_metrics(self._model_configs,
                                   eval_dataset,
                                   model_name=estimator_spec.name)))

        train_text_inputter = ParallelTextInputter(
            LineReader(
                data=self._model_configs["data"]["train_features_file"],
                maximum_length=self._model_configs["train"]
                ["maximum_features_length"],
                preprocessing_fn=lambda x: vocab_source.convert_to_idlist(x)),
            LineReader(
                data=self._model_configs["data"]["train_labels_file"],
                maximum_length=self._model_configs["train"]
                ["maximum_labels_length"],
                preprocessing_fn=lambda x: vocab_target.convert_to_idlist(x)),
            vocab_source.pad_id,
            vocab_target.pad_id,
            batch_size=self._model_configs["train"]["batch_size"],
            batch_tokens_size=self._model_configs["train"]
            ["batch_tokens_size"],
            shuffle_every_epoch=self._model_configs["train"]
            ["shuffle_every_epoch"],
            fill_full_batch=True,
            bucketing=True)
        train_data = train_text_inputter.make_feeding_data(
            input_fields=estimator_spec.input_fields)

        eidx = [0, 0]
        update_cycle = [self._model_configs["train"]["update_cycle"], 1]

        def step_fn(step_context):
            step_context.session.run(train_ops["zeros_op"])
            try:
                while update_cycle[0] != update_cycle[1]:
                    data = train_data.next()
                    step_context.session.run(train_ops["collect_op"],
                                             feed_dict=data["feed_dict"])
                    update_cycle[1] += 1
                data = train_data.next()
                update_cycle[1] = 1
                return step_context.run_with_hooks(train_ops["train_op"],
                                                   feed_dict=data["feed_dict"])
            except StopIteration:
                eidx[1] += 1

        while not sess.should_stop():
            if eidx[0] != eidx[1]:
                tf.logging.info("STARTUP Epoch {}".format(eidx[1]))
                eidx[0] = eidx[1]
            sess.run_step_fn(step_fn)
Exemple #4
0
def model_fn(
        model_configs,
        mode,
        dataset,
        name=None,
        reuse=None,
        distributed_mode=False,
        is_chief=True,
        verbose=True):
    """ Creates NMT model for training, evaluation or inference.

    Args:
        model_configs: A dictionary of all configurations.
        mode: A mode.
        dataset: A `Dataset` object.
        name: A string, the name of top-level of the variable scope.
        reuse: Whether to reuse all variables, the parameter passed
          to `tf.variable_scope()`.
        verbose: Print model parameters if set True.
        distributed_mode: Whether training is on distributed mode.
        is_chief: Whether is the chief worker.

    Returns: A `EstimatorSpec` object.
    """
    # Create model template function
    model_str = model_configs["model"]
    if model_str is None:
        model_str = "SequenceToSequence"
    # model_name = name or model_str.split(".")[-1]
    model_name = get_model_top_scope_name(model_str, name)
    if verbose:
        tf.logging.info("Create model: {} for {}".format(
            model_str, mode))
    # create model instance
    model = eval(model_str)(
        params=model_configs["model_params"],
        mode=mode,
        vocab_source=dataset.vocab_source,
        vocab_target=dataset.vocab_target,
        name=model_name,
        verbose=verbose)
    # create expert_utils.Parallelism
    parallelism = Parallelism(mode, reuse=reuse)

    if mode == ModeKeys.TRAIN:
        opt = OptimizerWrapper(model_configs["optimizer_params"])

    def _build_model():
        if verbose:
            tf.logging.info("Building Model.......")
        _input_fields = eval(model_str).create_input_fields(mode)
        _model_output = model.build(_input_fields)
        if verbose:
            tf.logging.info("Finish Building Model.......")
        if mode == ModeKeys.INFER:
            # model_output is prediction
            return _input_fields, _model_output
        elif mode == ModeKeys.EVAL:
            # model_output = (loss_sum, weight_sum), attention
            return _input_fields, _model_output[0], _model_output[1]
        else:  # mode == TRAIN
            # model_output = loss_sum, weight_sum
            _loss = _model_output[0] / _model_output[1]
            grads = opt.optimizer.compute_gradients(
                _loss,
                var_list=tf.trainable_variables(),
                colocate_gradients_with_ops=True)
            return _input_fields, _loss, grads

    model_returns = parallelism(_build_model)
    input_fields = model_returns[0]
    if mode == ModeKeys.INFER:
        predictions = model_returns[1]
        return EstimatorSpec(
            mode,
            input_fields=input_fields,
            predictions=predictions)

    if mode == ModeKeys.EVAL:
        loss_op, attention = model_returns[1:]
        return EstimatorSpec(
            mode,
            input_fields=input_fields,
            loss=loss_op,  # a list of tuples [(loss_sum0, weight_sum0), (loss_sum1, weight_sum1), ...]
            # attentions for force decoding
            predictions=attention)

    assert mode == ModeKeys.TRAIN
    loss_per_dp, grads = model_returns[1:]
    _add_to_display_collection(input_fields)
    # build train op
    train_loss, train_ops = opt.optimize(loss_per_dp, grads, update_cycle=model_configs["train"]["update_cycle"])
    tf.add_to_collection(Constants.DISPLAY_KEY_COLLECTION_NAME, Constants.TRAIN_LOSS_KEY_NAME)
    tf.add_to_collection(Constants.DISPLAY_VALUE_COLLECTION_NAME, train_loss)
    # build training hooks
    hooks = build_hooks(model_configs, distributed_mode=distributed_mode, is_chief=is_chief)
    from njunmt.training.text_metrics_spec import build_eval_metrics
    hooks.extend(build_eval_metrics(model_configs, dataset,
                                    is_cheif=is_chief, model_name=model_name))
    return EstimatorSpec(
        mode,
        input_fields=input_fields,
        loss=train_loss,
        train_ops=train_ops,
        training_hooks=hooks,
        training_chief_hooks=None)