Ejemplo n.º 1
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    output = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=output)

    logits = output

    # Calculate model loss.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params.label_smoothing, params.vocab_size)
    loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights)

    if mode == tf.estimator.ModeKeys.EVAL:
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op = get_train_op(loss, params)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
Ejemplo n.º 2
0
def get_model(vocab_size_source, vocab_size_target):
    """获取模型"""
    transformer = _transformer.Transformer(_config.num_layers, _config.d_model, _config.num_heads, _config.dff,
                                           vocab_size_source + 1, vocab_size_target + 1,
                                           pe_input=vocab_size_source + 1,
                                           pe_target=vocab_size_target + 1,
                                           rate=_config.dropout_rate)
    return transformer
Ejemplo n.º 3
0
 def initialize_network(self):
     de2idx, idx2de = load_de_vocab(self.min_cnt)
     en2idx, idx2en = load_en_vocab(self.min_cnt)
     source_word_count = len(en2idx)
     target_word_count = len(de2idx)
     self.model = transformer.Transformer(self.batch_size,
                                          source_word_count,
                                          target_word_count, self.max_len)
Ejemplo n.º 4
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        logits = model(inputs, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"], params["vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)

            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            metric_dict["minibatch_loss"] = loss
            record_scalars(metric_dict)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
Ejemplo n.º 5
0
def create_model(s_ids,t_ids,mode,config):
    eos_id=config.eos_id
    with tf.variable_scope('model'):
        model = transformer.Transformer(config, mode == tf.estimator.ModeKeys.TRAIN)
        logits = model(s_ids, t_ids,eos_id)
        with tf.variable_scope("loss"):
            xentropy, weights = metrics.padded_cross_entropy_loss(
                    logits, t_ids, config.label_smoothing, config.vocab_size)
            # Compute the weighted mean of the cross entropy losses
            loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)
    return loss
Ejemplo n.º 6
0
def release_model(**kwargs):
    release_dir = kwargs.get("release_dir", './release')
    restore_dir = kwargs.get('restore_dir', './out')
    if not os.path.isdir(release_dir):
        print("Create release dir:{}".format(release_dir))
        os.mkdir(release_dir)
    for file in glob.glob(os.path.join(release_dir, '*')):
        print("Remove previous file:{}".format(file))
        os.remove(file)
    # release后保存的模型文件,参数文件
    release_model_file = os.path.join(release_dir, 'model.ckpt')
    release_var_file = os.path.join(release_dir, 'var.pkl')
    # restore 的文件
    restore_step = kwargs.get('steps')
    if restore_step:
        restore_model_file = os.path.join(restore_dir, 'model.ckpt-{}'.format(restore_step))
    else:
        restore_model_file = tf.train.get_checkpoint_state(restore_dir).model_checkpoint_path
    restore_var_file = os.path.join(restore_dir, 'options.pkl')
    with open(restore_var_file, 'rb') as f:
        options = pickle.load(f)
        basic_config = config.basic_config()
        basic_config.__dict__.update(options)
        basic_config.beam_size = 2
    g = tf.Graph()
    with g.as_default():
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        with tf.Session(config=sess_config) as sess:
            input_ids = tf.placeholder(tf.int64, [None, None], name='input_ids')
            with tf.variable_scope('model'):
                model = transformer.Transformer(basic_config, False)
                out_res = model(input_ids, eos_id=basic_config.eos_id)
            top_decoded_ids = out_res['outputs']
            scores = out_res['scores']
            # print(top_decoded_ids.name)
            # print(scores.name)
            saver = tf.train.Saver()
            saver.restore(sess, restore_model_file)
            saver.save(sess, release_model_file)
            _vars = {'input_ids': input_ids.name, 'decode_ids': top_decoded_ids.name, 'scores': scores.name}
            with open(release_var_file, 'wb') as f:
                pickle.dump((_vars, options), f, -1)
            # res=sess.run(top_decoded_ids,{input_ids:np.array([[2,3,4,5]],dtype=np.int32)})
            # print(res)
            # print(res[0].shape)
            # print(res[1]['k'].shape)
            # print(res[1]['w'].shape)
            print("Done!")
Ejemplo n.º 7
0
def main(unused_argv):
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

    if FLAGS.params == "base":
        params = model_params.TransformerBaseParams
    elif FLAGS.params == "big":
        params = model_params.TransformerBigParams
    else:
        raise ValueError("Invalid parameter set defined: %s."
                         "Expected 'base' or 'big.'" % FLAGS.params)
    # Set up estimator and params
    params.beam_size = _BEAM_SIZE
    params.alpha = _ALPHA
    params.extra_decode_length = _EXTRA_DECODE_LENGTH
    params.frozen_graph = None
    input_shape = [None, None]
    input_tokens = tf.compat.v1.placeholder(tf.int64,
                                            input_shape,
                                            name='input_tokens')
    with tf.compat.v1.variable_scope("model"):
        model = transformer.Transformer(params, False)
        output = model(input_tokens)

    # Restore variables from checkpoint
    sess = tf.compat.v1.Session()
    latest_model = tf.train.latest_checkpoint(FLAGS.model_dir)
    saver = tf.compat.v1.train.Saver()
    saver.restore(sess, latest_model)

    # Freeze the graph
    graph_def = sess.graph.as_graph_def()
    output_names = [
        'model/Transformer/strided_slice_15',
        'model/Transformer/strided_slice_16'
    ]
    graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
        sess, graph_def, output_names)
    print("pb_path is", FLAGS.pb_path)
    with tf.compat.v1.gfile.GFile(FLAGS.pb_path, 'wb') as pb_file:
        pb_file.write(graph_def.SerializeToString())
Ejemplo n.º 8
0
def train_schedule(train_eval_iterations, single_iteration_train_steps, params,
                   bleu_source=None, bleu_ref=None,  bleu_threshold=None):
    """
    Train and evaluate model
    :param model: model to train
    :param train_eval_iterations: Number of times to repeat the train-eval iteration
    :param single_iteration_train_steps: Number of steps to train in one iteration
    :param bleu_source:File containing text to be translated for BLEU calculation.
    :param bleu_ref:File containing reference translations for BLEU calculation.
    :param bleu_threshold:minimum BLEU score before training is stopped.

    """
    print('Training schedule:')
    print('\t1.Train for %d iterations' % train_eval_iterations)
    print('\t2.Each iteration for %d steps.' % single_iteration_train_steps)
    print('\t3.Compute BLEU score.')
    '''if bleu_threshold is not None:
        print("Repeat above steps until the BLEU score reaches", bleu_threshold)
        train_eval_iterations = INF
    else:
        print("Repeat above steps %d times." % train_eval_iterations)'''

    # Loop training/evaluation/bleu cycles
    subtokenizer = tokenizer.Subtokenizer(vocab_file='vocab.ende.32768')
    dataset_train = dataset.TranslationDataset(dir_lang1='wmt32k-train.lang1',
                                               dir_lang2='wmt32k-train.lang2',
                                               subtokenizer=subtokenizer)
    global_step = 0
    best_bleu_score = 0
    net = transformer.Transformer(params=params, train=1)
    net.initialize(init=init.Xavier(), ctx=ctx, force_reinit=True)
    learning_rate = get_learning_rate(params.learning_rate, params.hidden_size,
                                      params.learning_rate_warmup_steps, global_step)
    optimizer = mx.optimizer.Adam(learning_rate=learning_rate, beta1=params.optimizer_adam_beta1,
                                  beta2=params.optimizer_adam_beta2, epsilon=params.optimizer_adam_epsilon)

    trainer = gluon.Trainer(net.collect_params(), optimizer=optimizer)
	bleu_score_file = open('blue_score_file', w+)
Ejemplo n.º 9
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    if params.frozen_graph and mode == tf.estimator.ModeKeys.PREDICT:
        print("Reading***** From *** pb", flush=True)
        input_map = {'input_tokens': features}
        output_names = [
            'model/Transformer/strided_slice_15',
            'model/Transformer/strided_slice_16'
        ]

        with tf.io.gfile.GFile(params.frozen_graph, "rb") as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())
        tf.graph_util.import_graph_def(graph_def,
                                       input_map,
                                       output_names,
                                       name="")
        output_tensors = [
            tf.compat.v1.get_default_graph().get_tensor_by_name(name + ":0")
            for name in output_names
        ]
        output = {'outputs': output_tensors[0], 'scores': output_tensors[1]}
        return tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.PREDICT,
                                          predictions=output)
    else:
        with tf.compat.v1.variable_scope("model"):
            inputs, targets = features, labels

            # Create model and get output logits.
            model = transformer.Transformer(
                params, mode == tf.estimator.ModeKeys.TRAIN)

            output = model(inputs, targets)

            # When in prediction mode, the labels/targets is None. The model output
            # is the prediction
            if mode == tf.estimator.ModeKeys.PREDICT:
                return tf.estimator.EstimatorSpec(
                    tf.estimator.ModeKeys.PREDICT, predictions=output)

            logits = output

            # Calculate model loss.
            xentropy, weights = metrics.padded_cross_entropy_loss(
                logits, targets, params.label_smoothing, params.vocab_size)
            loss = tf.reduce_sum(input_tensor=xentropy *
                                 weights) / tf.reduce_sum(input_tensor=weights)

            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metric_ops=metrics.get_eval_metrics(
                        logits, labels, params))
            else:
                train_op = get_train_op(loss, params)
                logging_hook = tf.compat.v1.train.LoggingTensorHook(
                    {"loss": loss}, every_n_iter=FLAGS.print_iter)
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=[logging_hook])
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    # 如果是predict:
    # returns a dictionary {
    #   output: [batch_size, decoded length]
    #   score: [batch_size, float]}
    # else:
    # Returns:
    #   float32 tensor with shape [batch_size, target_length, vocab_size]
    logits = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      if params["use_tpu"]:
        raise NotImplementedError("Prediction is not yet supported on TPUs.")
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=logits,
          export_outputs={
              "translate": tf.estimator.export.PredictOutput(logits)
          })

    # Explicitly set the shape of the logits for XLA (TPU). This is needed
    # because the logits are passed back to the host VM CPU for metric
    # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
    # it is known from Transformer that the first two dimensions of logits
    # are the dimensions of targets. Note that the ambiguous shape of logits is
    # not a problem when computing xentropy, because padded_cross_entropy_loss
    # resolves the shape on the TPU.
    logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

    # Calculate model loss.
    # xentropy contains the cross entropy loss of every nonpadding token in the
    # targets.
    # 训练时,labels 为0(即<PAD>)的对应loss的weight被置0
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params["label_smoothing"], params["vocab_size"])
    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

    # Save loss as named tensor that will be logged with the logging hook.
    tf.identity(loss, "cross_entropy")

    if mode == tf.estimator.ModeKeys.EVAL:
      if params["use_tpu"]:
        # host call functions should only have tensors as arguments.
        # This lambda pre-populates params so that metric_fn is
        # TPUEstimator compliant.
        def metric_fn(logits, labels): return (
            metrics.get_eval_metrics(logits, labels, params=params))
        eval_metrics = (metric_fn, [logits, labels])
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, predictions={"predictions": logits},
            eval_metrics=eval_metrics)
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op, metric_dict = get_train_op_and_metrics(loss, params)

      # Epochs can be quite long. This gives some intermediate information
      # in TensorBoard.
      metric_dict["minibatch_loss"] = loss
      if params["use_tpu"]:
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, train_op=train_op,
            host_call=tpu_util.construct_scalar_host_call(
                metric_dict=metric_dict, model_dir=params["model_dir"],
                prefix="training/")
        )
      record_scalars(metric_dict)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)