train_gen = data_generator(train_data,
                                   args.n_batch,
                                   shuffle=True,
                                   drop_last=False)
        dev_gen = data_generator(dev_data,
                                 args.n_batch * n_gpu,
                                 shuffle=False,
                                 drop_last=False)

        config = tf.ConfigProto()
        config.gpu_options.visible_device_list = str(mpi_rank)
        config.allow_soft_placement = True
        config.gpu_options.allow_growth = True

        utils.show_all_variables(rank=mpi_rank)
        utils.init_from_checkpoint(args.init_restore_dir, rank=mpi_rank)
        RawResult = collections.namedtuple(
            "RawResult", ["unique_id", "start_logits", "end_logits"])

        if mpi_rank == 0:
            saver = tf.train.Saver(var_list=tf.trainable_variables(),
                                   max_to_keep=1)
        else:
            saver = None

        with tf.train.MonitoredTrainingSession(checkpoint_dir=None,
                                               hooks=training_hooks,
                                               config=config) as sess:
            old_global_steps = sess.run(optimization.global_step)
            for i in range(args.train_epochs):
                print_rank0('Starting epoch %d' % (i + 1))
                           shape=[None, max_seq_length],
                           name='input_ids')
segment_ids = tf.placeholder(tf.int32,
                             shape=[None, max_seq_length],
                             name='segment_ids')
input_mask = tf.placeholder(tf.float32,
                            shape=[None, max_seq_length],
                            name='input_mask')
eval_model = BertModelMRC(config=bert_config,
                          is_training=False,
                          input_ids=input_ids,
                          input_mask=input_mask,
                          token_type_ids=segment_ids,
                          use_float16=False)

utils.init_from_checkpoint('model.ckpt')

config = tf.ConfigProto()
config.allow_soft_placement = True
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:
    sess.run(tf.global_variables_initializer())
    with tf.gfile.FastGFile('model.pb', 'wb') as f:
        graph_def = sess.graph.as_graph_def()
        output_nodes = ['start_logits', 'end_logits']
        print('outputs:', output_nodes)
        output_graph_def = graph_util.convert_variables_to_constants(
            sess, graph_def, output_nodes)
        f.write(output_graph_def.SerializeToString())