Beispiel #1
0
def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_resource_variables()
    agent_class = dqn_agent.DdqnAgent if FLAGS.use_ddqn else dqn_agent.DqnAgent
    train_eval(FLAGS.root_dir,
               agent_class=agent_class,
               num_iterations=FLAGS.num_iterations)
Beispiel #2
0
def main(_):
  logging.set_verbosity(logging.INFO)
  if common.has_eager_been_enabled():
    return 0
  tf.enable_resource_variables()
  TrainEval(FLAGS.root_dir, suite_atari.game(name=FLAGS.game_name),
            **get_run_args()).run()
Beispiel #3
0
def main(_):
    parser = argparse.ArgumentParser(
        description='Classification model training')
    parser.add_argument('--config_file', type=str, default=None,
                        help='Optional config file for params')
    parser.add_argument('opts', help='see config.py for all options',
                        default=None, nargs=argparse.REMAINDER)

    args = parser.parse_args()
    if args.config_file is not None:
        cfg_from_file(args.config_file)
    if args.opts is not None:
        cfg_from_list(args.opts)

    assert_and_infer_cfg()
    print_cfg()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID)
    logger = utils.setup_custom_logger('root')
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    tf_config = tf.ConfigProto(device_count=dict(
        GPU=1), gpu_options=tf.GPUOptions(allow_growth=True))
    tf.enable_resource_variables()

    train(tf_config, logger)
    test(tf_config, logger)
Beispiel #4
0
def tf1_multi_args_model_path() -> Generator[str, None, None]:
    def simple_model_fn():
        x1 = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="x1")
        x2 = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="x2")
        factor = tf.placeholder(shape=(), dtype=tf.float32, name="factor")

        w = tf.constant([[1.0], [1.0], [1.0], [1.0], [1.0]], dtype=tf.float32)

        x = x1 + x2 * factor
        p = tf.matmul(x, w)
        return {"p": p, "x1": x1, "x2": x2, "factor": factor}

    simple_model = simple_model_fn()

    with tempfile.TemporaryDirectory() as temp_dir:
        with sess.as_default():
            tf.enable_resource_variables()
            sess.run(tf.global_variables_initializer())
            inputs = {
                "x1": simple_model["x1"],
                "x2": simple_model["x2"],
                "factor": simple_model["factor"],
            }
            outputs = {"prediction": simple_model["p"]}

            tf.compat.v1.saved_model.simple_save(sess,
                                                 temp_dir,
                                                 inputs=inputs,
                                                 outputs=outputs)

        yield temp_dir
Beispiel #5
0
def main(_):
  if common.has_eager_been_enabled():
    return 0
  tf.compat.v1.enable_resource_variables()
  logging.set_verbosity(logging.INFO)
  tf.enable_resource_variables()
  train_eval(FLAGS.root_dir, num_iterations=FLAGS.num_iterations)
Beispiel #6
0
def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_resource_variables()
    environment_name = FLAGS.environment_name
    if environment_name is None:
        environment_name = suite_atari.game(name=FLAGS.game_name)
    TrainEval(FLAGS.root_dir, environment_name, **get_run_args()).run()
Beispiel #7
0
def main(_):
  logging.set_verbosity(logging.INFO)
  tf.enable_resource_variables()
  train_eval(
      FLAGS.root_dir,
      num_iterations=FLAGS.num_iterations,
      eval_interval=FLAGS.eval_interval)
    def _init_parameters(self):
        tf.enable_resource_variables()

        # self.model = tf.keras.Sequential([
        #     tf.keras.layers.Dense(512,
        #     activation=tf.nn.selu, input_shape=(self.FLAGS.num_mov,), name='w1'),
        #     tf.keras.layers.Dense(512, activation=tf.nn.selu, name='w2'),
        #     tf.keras.layers.Dense(1024, activation=tf.nn.selu),
        #     tf.keras.layers.Dropout(rate=0.8),
        #     tf.keras.layers.Dense(512, activation=tf.nn.selu),
        #     tf.keras.layers.Dense(512, activation=tf.nn.selu),
        #     tf.keras.layers.Dense(self.FLAGS.num_mov, activation=tf.nn.selu)
        # ])

        with tf.name_scope('weights'):
            # self.W_1 = tfe.Variable(tf.random_normal([self.FLAGS.num_mov, 512], mean=0.0, stddev=0.2), name='weight_1')
            # self.W_2 = tfe.Variable(tf.random_normal([512, 512], mean=0.0, stddev=0.2), name='weight_2')
            # self.W_3 = tfe.Variable(tf.random_normal([512, 1024], mean=0.0, stddev=0.2), name='weight_3')
            initializer = tf.contrib.layers.xavier_initializer()
            self.W_1 = tf.get_variable(shape=[self.FLAGS.num_mov, 512],
            initializer=initializer, name='weight_1')
            self.W_2 = tf.get_variable(shape=[512, 512], name='weight_2', initializer=initializer)
            self.W_3 = tf.get_variable(shape=[512, 1024], name='weight_3', initializer=initializer)

            # self.W_4 = tfe.Variable(tf.random_normal([1024, 512], mean=0.0, stddev=0.2), name='weight_4')
            # self.W_5 = tfe.Variable(tf.random_normal([512, 512], mean=0.0, stddev=0.2), name='weight_5')
            # self.W_6 = tfe.Variable(tf.random_normal([512, self.FLAGS.num_mov], mean=0.0, stddev=0.2), name='weight_6')

        with tf.name_scope('biases'):
            self.b1 = tf.get_variable(shape=[512], name='bias_1', initializer=self.bias_initializer)
            self.b2 = tf.get_variable(shape=[512], name='bias_2', initializer=self.bias_initializer)
            self.b3 = tf.get_variable(shape=[1024], name='bias_3', initializer=self.bias_initializer)
            self.b4 = tf.get_variable(shape=[512], name='bias_4', initializer=self.bias_initializer)
            self.b5 = tf.get_variable(shape=[512], name='bias_5', initializer=self.bias_initializer)
            self.b6 = tf.get_variable(shape=[17770], name='bias_6', initializer=self.bias_initializer)
Beispiel #9
0
def main(results):
    tf.enable_resource_variables()
    # restore config and model
    dir_output = results
    weights_dir = os.path.join(dir_output, 'model.weights/')

    t = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')
    # saved_path = 'saved_' + t
    saved_path = 'saved_word'
    saved_path = os.path.join(dir_output, saved_path)

    config_data = Config(dir_output + "data.json")
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")
    vocab = Vocab(config_vocab)

    if not os.path.isdir(saved_path):
        model = Img2SeqModel(config_model, dir_output, vocab)
        model.build_pred()
        model.restore_session(weights_dir)
        model.save_savedmodel(saved_path)

    # chkp.print_tensors_in_checkpoint_file(weights_dir, tensor_name='', all_tensors=True)

    SAMPLE_DIR = 'tools/data/hand/raw_word'

    def representative_dataset_gen():
        num_calibration_steps = 10

        if not os.path.isdir(SAMPLE_DIR):
            print 'Failed to read representative_dataset'
            return

        for f in os.listdir(SAMPLE_DIR):
            img_path = os.path.join(SAMPLE_DIR, f)
            img = Image.open(img_path)
            img = img.resize((80, 100), Image.BILINEAR)
            img.show()
            img = np.array(img)
            yield [img]

            num_calibration_steps -= 1
            if num_calibration_steps == 0:
                break

    converter = tf.lite.TFLiteConverter.from_saved_model(saved_path)
    converter.target_ops = [
        # tf.lite.OpsSet.TFLITE_BUILTINS,
        tf.lite.OpsSet.SELECT_TF_OPS
    ]

    # Following has "Segmentation fault"
    # converter.optimizations = [tf.lite.Optimize.DEFAULT]
    # converter.representative_dataset = representative_dataset_gen

    tflite_model = converter.convert()
    open("converted_model_word.tflite", "wb").write(tflite_model)
Beispiel #10
0
def main(_):
    # Ignore all information-related logs
    logging.set_verbosity(logging.INFO)
    # Enable resource variables
    tf.enable_resource_variables()
    agent_class = dqn_agent.DdqnAgent if FLAGS.use_ddqn else dqn_agent.DqnAgent
    # Train the agent & evaluate it!
    train_eval(FLAGS.root_dir,
               agent_class=agent_class,
               num_iterations=FLAGS.num_iterations)
Beispiel #11
0
def main():
  """Runs all unit tests with select TF 2.0 features enabled.

  This function should only be used if TensorFlow code is being tested.
  Eventually, all TF 2.0 features will be enabled.
  """
  tf2.enable()  # Switches TensorArrayV2 and control flow V2
  tf.enable_v2_tensorshape()
  tf.enable_resource_variables()  # Required since we use defuns.
  tf.test.main()
Beispiel #12
0
def main(unused_argv):
    tf.enable_resource_variables()

    with tf.device('/CPU:0'):  # due to b/128333994
        env = wheel_py_environment.WheelPyEnvironment(DELTA, MU_BASE, STD_BASE,
                                                      MU_HIGH, STD_HIGH,
                                                      BATCH_SIZE)
        environment = tf_py_environment.TFPyEnvironment(env)

        optimal_reward_fn = functools.partial(
            environment_utilities.tf_wheel_bandit_compute_optimal_reward,
            delta=DELTA,
            mu_inside=MU_BASE[0],
            mu_high=MU_HIGH)
        optimal_action_fn = functools.partial(
            environment_utilities.tf_wheel_bandit_compute_optimal_action,
            delta=DELTA)

        if FLAGS.agent == 'LinUCB':
            agent = lin_ucb_agent.LinearUCBAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                dtype=tf.float32)
        elif FLAGS.agent == 'LinTS':
            agent = lin_ts_agent.LinearThompsonSamplingAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                dtype=tf.float32)
        elif FLAGS.agent == 'epsGreedy':
            network = q_network.QNetwork(
                input_tensor_spec=environment.time_step_spec().observation,
                action_spec=environment.action_spec(),
                fc_layer_params=LAYERS)
            agent = eps_greedy_agent.NeuralEpsilonGreedyAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                reward_network=network,
                optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=LR),
                epsilon=EPSILON)

        regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn)
        suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric(
            optimal_action_fn)

        trainer.train(
            root_dir=FLAGS.root_dir,
            agent=agent,
            environment=environment,
            training_loops=TRAINING_LOOPS,
            steps_per_loop=STEPS_PER_LOOP,
            additional_metrics=[regret_metric, suboptimal_arms_metric])
Beispiel #13
0
 def export_saved_model(self, **kwargs):
   """Export a saved model for inference."""
   tf.enable_resource_variables()
   driver = inference.ServingDriver(
       self.model_name,
       self.ckpt_path,
       batch_size=self.batch_size,
       use_xla=self.use_xla,
       model_params=self.model_config.as_dict(),
       **kwargs)
   driver.build()
   driver.export(self.saved_model_dir, self.tflite_path, self.tensorrt)
def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_v2_behavior()
    tf.enable_resource_variables()
    tf.enable_control_flow_v2()
    logging.info('Executing eagerly: %s', tf.executing_eagerly())
    logging.info('parsing config files: %s', FLAGS.gin_file)
    gin.parse_config_files_and_bindings(FLAGS.gin_file,
                                        FLAGS.gin_bindings,
                                        skip_unknown=True)

    trainer.train(root_dir, eval_metrics_callback=metrics_callback)
 def setUp(self):
   super(CategoricalDqnAgentTest, self).setUp()
   tf.enable_resource_variables()
   self._obs_spec = tensor_spec.TensorSpec([2], tf.float32)
   self._time_step_spec = ts.time_step_spec(self._obs_spec)
   self._action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 1)
   self._categorical_net = categorical_q_network.CategoricalQNetwork(
       self._obs_spec,
       self._action_spec,
       fc_layer_params=[4])
   self._dummy_categorical_net = DummyCategoricalNet(self._obs_spec)
   self._optimizer = tf.train.GradientDescentOptimizer(0.01)
Beispiel #16
0
def main(unused_argv):
    tf.enable_resource_variables()

    with tf.device('/CPU:0'):  # due to b/128333994
        observation_shape = [CONTEXT_DIM]
        overall_shape = [BATCH_SIZE] + observation_shape
        observation_distribution = tfd.Normal(loc=tf.zeros(overall_shape),
                                              scale=tf.ones(overall_shape))
        action_shape = [NUM_ACTIONS]
        observation_to_reward_shape = observation_shape + action_shape
        observation_to_reward_distribution = tfd.Normal(
            loc=tf.zeros(observation_to_reward_shape),
            scale=tf.ones(observation_to_reward_shape))
        drift_distribution = tfd.Normal(loc=DRIFT_MEAN, scale=DRIFT_VARIANCE)
        additive_reward_distribution = tfd.Normal(
            loc=tf.zeros(action_shape),
            scale=(REWARD_NOISE_VARIANCE * tf.ones(action_shape)))
        environment_dynamics = dle.DriftingLinearDynamics(
            observation_distribution, observation_to_reward_distribution,
            drift_distribution, additive_reward_distribution)
        environment = nse.NonStationaryStochasticEnvironment(
            environment_dynamics)

        if FLAGS.agent == 'LinUCB':
            agent = lin_ucb_agent.LinearUCBAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                gamma=0.95,
                emit_log_probability=False,
                dtype=tf.float32)
        elif FLAGS.agent == 'LinTS':
            agent = lin_ts_agent.LinearThompsonSamplingAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                gamma=0.95,
                dtype=tf.float32)

        regret_metric = tf_bandit_metrics.RegretMetric(
            environment.environment_dynamics.compute_optimal_reward)
        suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric(
            environment.environment_dynamics.compute_optimal_action)

        trainer.train(
            root_dir=FLAGS.root_dir,
            agent=agent,
            environment=environment,
            training_loops=TRAINING_LOOPS,
            steps_per_loop=STEPS_PER_LOOP,
            additional_metrics=[regret_metric, suboptimal_arms_metric])
def main(unused_argv):
    tf.enable_resource_variables()

    with tf.device('/CPU:0'):  # due to b/128333994
        action_reward_fns = (
            environment_utilities.sliding_linear_reward_fn_generator(
                CONTEXT_DIM, NUM_ACTIONS, REWARD_NOISE_VARIANCE))

        env = sspe.StationaryStochasticPyEnvironment(functools.partial(
            environment_utilities.context_sampling_fn,
            batch_size=BATCH_SIZE,
            context_dim=CONTEXT_DIM),
                                                     action_reward_fns,
                                                     batch_size=BATCH_SIZE)
        environment = tf_py_environment.TFPyEnvironment(env)

        optimal_reward_fn = functools.partial(
            environment_utilities.tf_compute_optimal_reward,
            per_action_reward_fns=action_reward_fns)

        optimal_action_fn = functools.partial(
            environment_utilities.tf_compute_optimal_action,
            per_action_reward_fns=action_reward_fns)

        if FLAGS.agent == 'LinUCB':
            agent = lin_ucb_agent.LinearUCBAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                dtype=tf.float32)
        elif FLAGS.agent == 'LinTS':
            agent = lin_ts_agent.LinearThompsonSamplingAgent(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                alpha=AGENT_ALPHA,
                dtype=tf.float32)

        regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn)
        suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric(
            optimal_action_fn)

        trainer.train(
            root_dir=FLAGS.root_dir,
            agent=agent,
            environment=environment,
            training_loops=TRAINING_LOOPS,
            steps_per_loop=STEPS_PER_LOOP,
            additional_metrics=[regret_metric, suboptimal_arms_metric])
Beispiel #18
0
    def __init__(self, ncf_dataset, params):
        with tf.Graph().as_default() as self._graph:
            if params["use_xla_for_gpu"]:
                # The XLA functions we use require resource variables.
                tf.enable_resource_variables()
            self._ncf_dataset = ncf_dataset
            self._global_step = tf.train.create_global_step()
            self._train_model_properties = self._build_model(params,
                                                             is_training=True)
            self._eval_model_properties = self._build_model(params,
                                                            is_training=False)

            initializer = tf.global_variables_initializer()
        self._graph.finalize()
        self._session = tf.Session(graph=self._graph)
        self._session.run(initializer)
    def benchmark_graph(self):
        """Benchmark Graph performance."""

        hparams = get_default_hparams()
        tf.enable_resource_variables()
        for sample_size in [10, 25, 50, 100, 200]:
            hparams.n_samples = sample_size
            tf.reset_default_graph()
            with tf.Graph().as_default():
                energy_fn, _, _ = l2hmc.get_scg_energy_fn()
                x = tf.random_normal([hparams.n_samples, hparams.x_dim],
                                     dtype=tf.float32)
                dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                          minus_loglikelihood_fn=energy_fn,
                                          n_steps=hparams.n_steps,
                                          eps=hparams.eps)
                loss, _, _ = l2hmc.compute_loss(dynamics, x)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)
                train_op, loss, _ = graph_step(dynamics, optimizer, x)

                # Single thread; fairer comparison against eager
                session_conf = tf.ConfigProto(inter_op_parallelism_threads=1)

                with tf.Session(config=session_conf) as sess:
                    sess.run(tf.global_variables_initializer())

                    # Warmup to reduce initialization effect when timing
                    for _ in range(hparams.n_warmup_iters):
                        _, _ = sess.run([train_op, loss])

                    # Training
                    start_time = time.time()
                    for i in range(hparams.n_iters):
                        _, loss_np = sess.run([train_op, loss])
                        print("Iteration %d: loss %.4f" % (i, loss_np))
                    wall_time = (time.time() - start_time) / hparams.n_iters
                    examples_per_sec = hparams.n_samples / wall_time

                    self.report_benchmark(
                        name="graph_train_%s_%d" %
                        ("gpu" if tf.test.is_gpu_available() else "cpu",
                         sample_size),
                        iters=hparams.n_iters,
                        extras={"examples_per_sec": examples_per_sec},
                        wall_time=wall_time)
Beispiel #20
0
  def benchmark_graph(self):
    """Benchmark Graph performance."""

    hparams = get_default_hparams()
    tf.enable_resource_variables()
    for sample_size in [10, 25, 50, 100, 200]:
      hparams.n_samples = sample_size
      tf.reset_default_graph()
      with tf.Graph().as_default():
        energy_fn, _, _ = l2hmc.get_scg_energy_fn()
        x = tf.random_normal([hparams.n_samples, hparams.x_dim],
                             dtype=tf.float32)
        dynamics = l2hmc.Dynamics(
            x_dim=hparams.x_dim,
            minus_loglikelihood_fn=energy_fn,
            n_steps=hparams.n_steps,
            eps=hparams.eps)
        loss, _, _ = l2hmc.compute_loss(dynamics, x)

        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
        train_op, loss, _ = graph_step(dynamics, optimizer, x)

        # Single thread; fairer comparison against eager
        session_conf = tf.ConfigProto(inter_op_parallelism_threads=1)

        with tf.Session(config=session_conf) as sess:
          sess.run(tf.global_variables_initializer())

          # Warmup to reduce initialization effect when timing
          for _ in range(hparams.n_warmup_iters):
            _, _ = sess.run([train_op, loss])

          # Training
          start_time = time.time()
          for i in range(hparams.n_iters):
            _, loss_np = sess.run([train_op, loss])
            print("Iteration %d: loss %.4f" % (i, loss_np))
          wall_time = (time.time() - start_time) / hparams.n_iters
          examples_per_sec = hparams.n_samples / wall_time

          self.report_benchmark(
              name="graph_train_%s_%d" %
              ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size),
              iters=hparams.n_iters,
              extras={"examples_per_sec": examples_per_sec},
              wall_time=wall_time)
Beispiel #21
0
  def __init__(self, ncf_dataset, params, num_train_steps, num_eval_steps,
               use_while_loop):
    self._num_train_steps = num_train_steps
    self._num_eval_steps = num_eval_steps
    self._use_while_loop = use_while_loop
    with tf.Graph().as_default() as self._graph:
      if params["use_xla_for_gpu"]:
        # The XLA functions we use require resource variables.
        tf.enable_resource_variables()
      self._ncf_dataset = ncf_dataset
      self._global_step = tf.train.create_global_step()
      self._train_model_properties = self._build_model(params, num_train_steps,
                                                       is_training=True)
      self._eval_model_properties = self._build_model(params, num_eval_steps,
                                                      is_training=False)

      initializer = tf.global_variables_initializer()
    self._graph.finalize()
    self._session = tf.Session(graph=self._graph)
    self._session.run(initializer)
Beispiel #22
0
def main(argv):
    del argv  # Unused.

    # If using update_damping_immediately resource variables must be enabled.
    # (Although they probably will be by default on TPUs.)
    if FLAGS.update_damping_immediately:
        tf.enable_resource_variables()

    tf.set_random_seed(FLAGS.seed)
    # Invert using cholesky decomposition + triangular solve.  This is the only
    # code path for matrix inversion supported on TPU right now.
    kfac.utils.set_global_constants(posdef_inv_method='cholesky')
    kfac.fisher_factors.set_global_constants(
        eigenvalue_decomposition_threshold=10000)

    if not FLAGS.use_sua_approx:
        if FLAGS.use_custom_patches_op:
            kfac.fisher_factors.set_global_constants(
                use_patches_second_moment_op=True)
        else:
            # Temporary measure to save memory with giant batches:
            kfac.fisher_factors.set_global_constants(
                sub_sample_inputs=True, inputs_to_extract_patches_factor=0.1)

    config = make_tpu_run_config(FLAGS.master, FLAGS.seed, FLAGS.model_dir,
                                 FLAGS.iterations_per_loop,
                                 FLAGS.save_checkpoints_steps)

    estimator = contrib_tpu.TPUEstimator(use_tpu=True,
                                         model_fn=_model_fn,
                                         config=config,
                                         train_batch_size=FLAGS.batch_size,
                                         eval_batch_size=1024)

    estimator.train(input_fn=mnist_input_fn,
                    max_steps=FLAGS.train_steps,
                    hooks=[])
def main(_):
  # causes memory fragmentation for bert leading to OOM
  if os.environ.get("TF_XLA_FLAGS", None) is not None:
    os.environ["TF_XLA_FLAGS"] += " --tf_xla_enable_lazy_compilation false"
  else:
    os.environ["TF_XLA_FLAGS"] = " --tf_xla_enable_lazy_compilation false"

  # Enable async_io to speed up multi-gpu training with XLA and Horovod.
  os.environ["TF_XLA_FLAGS"] += " --tf_xla_async_io_level 1"

  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
  dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path)

  if FLAGS.horovod:
    hvd.init()

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  validate_flags_or_throw(bert_config)

  tf.io.gfile.makedirs(FLAGS.output_dir)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  master_process = True
  training_hooks = []
  global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps
  hvd_rank = 0

  config = tf.compat.v1.ConfigProto()
  learning_rate = FLAGS.learning_rate
  if FLAGS.horovod:

      tf.compat.v1.logging.info("Multi-GPU training with TF Horovod")
      tf.compat.v1.logging.info("hvd.size() = %d hvd.rank() = %d", hvd.size(), hvd.rank())
      global_batch_size = FLAGS.train_batch_size * hvd.size() * FLAGS.num_accumulation_steps
      learning_rate = learning_rate * hvd.size()
      master_process = (hvd.rank() == 0)
      hvd_rank = hvd.rank()
      config.gpu_options.visible_device_list = str(hvd.local_rank())
      if hvd.size() > 1:
          training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
  if FLAGS.use_xla:
    config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
    if FLAGS.amp:
        tf.enable_resource_variables()

  run_config = tf.estimator.RunConfig(
      model_dir=FLAGS.output_dir if master_process else None,
      session_config=config,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps if master_process else None,
      save_summary_steps=FLAGS.save_checkpoints_steps if master_process else None,
      log_step_count_steps=FLAGS.display_loss_steps,
      keep_checkpoint_max=1)

  if master_process:
      tf.compat.v1.logging.info("***** Configuaration *****")
      for key in FLAGS.__flags.keys():
          tf.compat.v1.logging.info('  {}: {}'.format(key, getattr(FLAGS, key)))
      tf.compat.v1.logging.info("**************************")

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank, FLAGS.save_checkpoints_steps))

  # Prepare Training Data
  if FLAGS.do_train:
    train_examples = read_squad_examples(
        input_file=FLAGS.train_file, is_training=True,
        version_2_with_negative=FLAGS.version_2_with_negative)
    num_train_steps = int(
        len(train_examples) / global_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    # Pre-shuffle the input to avoid having to make a very large shuffle
    # buffer in in the `input_fn`.
    rng = random.Random(12345)
    rng.shuffle(train_examples)

    start_index = 0 
    end_index = len(train_examples)
    tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")]

    if FLAGS.horovod:
      tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i)) for i in range(hvd.size())]
      num_examples_per_rank = len(train_examples) // hvd.size()
      remainder = len(train_examples) % hvd.size()
      if hvd.rank() < remainder:
        start_index = hvd.rank() * (num_examples_per_rank+1)
        end_index = start_index + num_examples_per_rank + 1
      else:
        start_index = hvd.rank() * num_examples_per_rank + remainder
        end_index = start_index + (num_examples_per_rank)


  model_fn = model_fn_builder(
      bert_config=bert_config,
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      hvd=None if not FLAGS.horovod else hvd,
      amp=FLAGS.amp)

  estimator = tf.estimator.Estimator(
      model_fn=model_fn,
      config=run_config)

  if FLAGS.do_train:

    # We write to a temporary file to avoid storing very large constant tensors
    # in memory.
    train_writer = FeatureWriter(
        filename=tmp_filenames[hvd_rank],
        is_training=True)
    convert_examples_to_features(
        examples=train_examples[start_index:end_index],
        tokenizer=tokenizer,
        max_seq_length=FLAGS.max_seq_length,
        doc_stride=FLAGS.doc_stride,
        max_query_length=FLAGS.max_query_length,
        is_training=True,
        output_fn=train_writer.process_feature,
        verbose_logging=FLAGS.verbose_logging)
    train_writer.close()

    tf.compat.v1.logging.info("***** Running training *****")
    tf.compat.v1.logging.info("  Num orig examples = %d", end_index - start_index)
    tf.compat.v1.logging.info("  Num split examples = %d", train_writer.num_features)
    tf.compat.v1.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.compat.v1.logging.info("  Num steps = %d", num_train_steps)
    tf.compat.v1.logging.info("  LR = %f", learning_rate)
    del train_examples

    train_input_fn = input_fn_builder(
        input_file=tmp_filenames,
        batch_size=FLAGS.train_batch_size,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True,
        hvd=None if not FLAGS.horovod else hvd)

    train_start_time = time.time()
    estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=num_train_steps)
    train_time_elapsed = time.time() - train_start_time
    train_time_wo_overhead = training_hooks[-1].total_time
    avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed
    ss_sentences_per_second = (num_train_steps - training_hooks[-1].skipped) * global_batch_size * 1.0 / train_time_wo_overhead

    if master_process:
        tf.compat.v1.logging.info("-----------------------------")
        tf.compat.v1.logging.info("Total Training Time = %0.2f for Sentences = %d", train_time_elapsed,
                        num_train_steps * global_batch_size)
        tf.compat.v1.logging.info("Total Training Time W/O Overhead = %0.2f for Sentences = %d", train_time_wo_overhead,
                        (num_train_steps - training_hooks[-1].skipped) * global_batch_size)
        tf.compat.v1.logging.info("Throughput Average (sentences/sec) with overhead = %0.2f", avg_sentences_per_second)
        tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second)
        dllogging.logger.log(step=(), data={"throughput_train": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT)
        tf.compat.v1.logging.info("-----------------------------")


  if FLAGS.export_triton and master_process:
    export_model(estimator, FLAGS.output_dir, FLAGS.init_checkpoint)

  if FLAGS.do_predict and master_process:
    eval_examples = read_squad_examples(
        input_file=FLAGS.predict_file, is_training=False,
        version_2_with_negative=FLAGS.version_2_with_negative)

    # Perform evaluation on subset, useful for profiling
    if FLAGS.num_eval_iterations is not None:
        eval_examples = eval_examples[:FLAGS.num_eval_iterations*FLAGS.predict_batch_size]

    eval_writer = FeatureWriter(
        filename=os.path.join(FLAGS.output_dir, "eval.tf_record"),
        is_training=False)
    eval_features = []

    def append_feature(feature):
      eval_features.append(feature)
      eval_writer.process_feature(feature)

    convert_examples_to_features(
        examples=eval_examples,
        tokenizer=tokenizer,
        max_seq_length=FLAGS.max_seq_length,
        doc_stride=FLAGS.doc_stride,
        max_query_length=FLAGS.max_query_length,
        is_training=False,
        output_fn=append_feature,
        verbose_logging=FLAGS.verbose_logging)
    eval_writer.close()

    tf.compat.v1.logging.info("***** Running predictions *****")
    tf.compat.v1.logging.info("  Num orig examples = %d", len(eval_examples))
    tf.compat.v1.logging.info("  Num split examples = %d", len(eval_features))
    tf.compat.v1.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_input_fn = input_fn_builder(
        input_file=eval_writer.filename,
        batch_size=FLAGS.predict_batch_size,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=False)

    all_results = []
    eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)]
    eval_start_time = time.time()
    for result in estimator.predict(
        predict_input_fn, yield_single_examples=True, hooks=eval_hooks):
      if len(all_results) % 1000 == 0:
        tf.compat.v1.logging.info("Processing example: %d" % (len(all_results)))
      unique_id = int(result["unique_ids"])
      start_logits = [float(x) for x in result["start_logits"].flat]
      end_logits = [float(x) for x in result["end_logits"].flat]
      all_results.append(
          RawResult(
              unique_id=unique_id,
              start_logits=start_logits,
              end_logits=end_logits))

    eval_time_elapsed = time.time() - eval_start_time

    time_list = eval_hooks[-1].time_list
    time_list.sort()
    # Removing outliers (init/warmup) in throughput computation.
    eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)])
    num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size

    avg = np.mean(time_list)
    cf_50 = max(time_list[:int(len(time_list) * 0.50)])
    cf_90 = max(time_list[:int(len(time_list) * 0.90)])
    cf_95 = max(time_list[:int(len(time_list) * 0.95)])
    cf_99 = max(time_list[:int(len(time_list) * 0.99)])
    cf_100 = max(time_list[:int(len(time_list) * 1)])
    ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead

    tf.compat.v1.logging.info("-----------------------------")
    tf.compat.v1.logging.info("Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed,
                    eval_hooks[-1].count * FLAGS.predict_batch_size)
    tf.compat.v1.logging.info("Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead,
                    num_sentences)
    tf.compat.v1.logging.info("Summary Inference Statistics")
    tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size)
    tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length)
    tf.compat.v1.logging.info("Precision = %s", "fp16" if FLAGS.amp else "fp32")
    tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f", cf_50 * 1000)
    tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f", cf_90 * 1000)
    tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f", cf_95 * 1000)
    tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f", cf_99 * 1000)
    tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f", cf_100 * 1000)
    tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000)
    tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second)
    dllogging.logger.log(step=(), data={"throughput_val": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT)
    tf.compat.v1.logging.info("-----------------------------")

    output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json")
    output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json")
    output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json")

    write_predictions(eval_examples, eval_features, all_results,
                      FLAGS.n_best_size, FLAGS.max_answer_length,
                      FLAGS.do_lower_case, output_prediction_file,
                      output_nbest_file, output_null_log_odds_file,
                      FLAGS.version_2_with_negative, FLAGS.verbose_logging)

    if FLAGS.eval_script:
        import sys
        import subprocess
        eval_out = subprocess.check_output([sys.executable, FLAGS.eval_script,
                                          FLAGS.predict_file, output_prediction_file])
        scores = str(eval_out).strip()
        exact_match = float(scores.split(":")[1].split(",")[0])
        f1 = float(scores.split(":")[2].split("}")[0])
        dllogging.logger.log(step=(), data={"f1": f1}, verbosity=Verbosity.DEFAULT)
        dllogging.logger.log(step=(), data={"exact_match": exact_match}, verbosity=Verbosity.DEFAULT)
        print(str(eval_out))
Beispiel #24
0
def main(_):
    os.environ[
        "TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false"  #causes memory fragmentation for bert leading to OOM

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
    dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path)

    if FLAGS.horovod:
        hvd.init()

    processors = {
        "bc5cdr": BC5CDRProcessor,
        "clefe": CLEFEProcessor,
        'i2b2': I2b22012Processor
    }
    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    task_name = FLAGS.task_name.lower()
    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    tf.io.gfile.makedirs(FLAGS.output_dir)

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    master_process = True
    training_hooks = []
    global_batch_size = FLAGS.train_batch_size
    hvd_rank = 0

    config = tf.compat.v1.ConfigProto()
    if FLAGS.horovod:
        global_batch_size = FLAGS.train_batch_size * hvd.size()
        master_process = (hvd.rank() == 0)
        hvd_rank = hvd.rank()
        config.gpu_options.visible_device_list = str(hvd.local_rank())
        if hvd.size() > 1:
            training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))

    if FLAGS.use_xla:
        config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
        tf.enable_resource_variables()
    run_config = tf.estimator.RunConfig(
        model_dir=FLAGS.output_dir if master_process else None,
        session_config=config,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps
        if master_process else None,
        keep_checkpoint_max=1)

    if master_process:
        tf.compat.v1.logging.info("***** Configuaration *****")
        for key in FLAGS.__flags.keys():
            tf.compat.v1.logging.info('  {}: {}'.format(
                key, getattr(FLAGS, key)))
        tf.compat.v1.logging.info("**************************")

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank))

    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / global_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        start_index = 0
        end_index = len(train_examples)
        tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")]

        if FLAGS.horovod:
            tmp_filenames = [
                os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i))
                for i in range(hvd.size())
            ]
            num_examples_per_rank = len(train_examples) // hvd.size()
            remainder = len(train_examples) % hvd.size()
            if hvd.rank() < remainder:
                start_index = hvd.rank() * (num_examples_per_rank + 1)
                end_index = start_index + num_examples_per_rank + 1
            else:
                start_index = hvd.rank() * num_examples_per_rank + remainder
                end_index = start_index + (num_examples_per_rank)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list) + 1,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate
                                if not FLAGS.horovod else FLAGS.learning_rate *
                                hvd.size(),
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_one_hot_embeddings=False,
                                hvd=None if not FLAGS.horovod else hvd,
                                use_fp16=FLAGS.use_fp16)

    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)

    if FLAGS.do_train:
        #train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        #filed_based_convert_examples_to_features(
        #    train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
        filed_based_convert_examples_to_features(
            train_examples[start_index:end_index], label_list,
            FLAGS.max_seq_length, tokenizer, tmp_filenames[hvd_rank])
        tf.compat.v1.logging.info("***** Running training *****")
        tf.compat.v1.logging.info("  Num examples = %d", len(train_examples))
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.compat.v1.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=tmp_filenames,  #train_file,
            batch_size=FLAGS.train_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            hvd=None if not FLAGS.horovod else hvd)

        #estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
        train_start_time = time.time()
        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=training_hooks)
        train_time_elapsed = time.time() - train_start_time
        train_time_wo_overhead = training_hooks[-1].total_time
        avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed
        ss_sentences_per_second = (
            num_train_steps - training_hooks[-1].skipped
        ) * global_batch_size * 1.0 / train_time_wo_overhead

        if master_process:
            tf.compat.v1.logging.info("-----------------------------")
            tf.compat.v1.logging.info(
                "Total Training Time = %0.2f for Sentences = %d",
                train_time_elapsed, num_train_steps * global_batch_size)
            tf.compat.v1.logging.info(
                "Total Training Time W/O Overhead = %0.2f for Sentences = %d",
                train_time_wo_overhead,
                (num_train_steps - training_hooks[-1].skipped) *
                global_batch_size)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) with overhead = %0.2f",
                avg_sentences_per_second)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) = %0.2f",
                ss_sentences_per_second)
            dllogging.logger.log(
                step=(),
                data={"throughput_train": ss_sentences_per_second},
                verbosity=Verbosity.DEFAULT)
            tf.compat.v1.logging.info("-----------------------------")

    if FLAGS.do_eval and master_process:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        filed_based_convert_examples_to_features(eval_examples, label_list,
                                                 FLAGS.max_seq_length,
                                                 tokenizer, eval_file)

        tf.compat.v1.logging.info("***** Running evaluation *****")
        tf.compat.v1.logging.info("  Num examples = %d", len(eval_examples))
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        eval_steps = None
        eval_drop_remainder = False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            batch_size=FLAGS.eval_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)
        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.io.gfile.Open(output_eval_file, "w") as writer:
            tf.compat.v1.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.compat.v1.logging.info("  %s = %s", key, str(result[key]))
                dllogging.logger.log(step=(),
                                     data={key: float(strresult[key])},
                                     verbosity=Verbosity.DEFAULT)
                writer.write("%s = %s\n" % (key, str(result[key])))
    if FLAGS.do_predict and master_process:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        filed_based_convert_examples_to_features(predict_examples,
                                                 label_list,
                                                 FLAGS.max_seq_length,
                                                 tokenizer,
                                                 predict_file,
                                                 mode="test")

        with tf.io.gfile.Open(os.path.join(FLAGS.output_dir, 'label2id.pkl'),
                              'rb') as rf:
            label2id = pickle.load(rf)
            id2label = {value: key for key, value in label2id.items()}
        token_path = os.path.join(FLAGS.output_dir, "token_test.txt")
        if tf.io.gfile.Exists(token_path):
            tf.io.gfile.Remove(token_path)

        tf.compat.v1.logging.info("***** Running prediction*****")
        tf.compat.v1.logging.info("  Num examples = %d", len(predict_examples))
        tf.compat.v1.logging.info("  Batch size = %d",
                                  FLAGS.predict_batch_size)

        predict_drop_remainder = False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            batch_size=FLAGS.predict_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)]
        eval_start_time = time.time()

        output_predict_file = os.path.join(FLAGS.output_dir, "label_test.txt")
        test_labels_file = os.path.join(FLAGS.output_dir, "test_labels.txt")
        test_labels_err_file = os.path.join(FLAGS.output_dir,
                                            "test_labels_errs.txt")
        with tf.io.gfile.Open(output_predict_file, 'w') as writer, \
                tf.io.gfile.Open(test_labels_file, 'w') as tl, \
                tf.io.gfile.Open(test_labels_err_file, 'w') as tle:
            print(id2label)
            i = 0
            for prediction in estimator.predict(input_fn=predict_input_fn,
                                                hooks=eval_hooks,
                                                yield_single_examples=True):
                output_line = "\n".join(id2label[id]
                                        for id in prediction if id != 0) + "\n"
                writer.write(output_line)
                result_to_pair(predict_examples[i], prediction, id2label, tl,
                               tle)
                i = i + 1

        eval_time_elapsed = time.time() - eval_start_time

        time_list = eval_hooks[-1].time_list
        time_list.sort()
        # Removing outliers (init/warmup) in throughput computation.
        eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)])
        num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size

        avg = np.mean(time_list)
        cf_50 = max(time_list[:int(len(time_list) * 0.50)])
        cf_90 = max(time_list[:int(len(time_list) * 0.90)])
        cf_95 = max(time_list[:int(len(time_list) * 0.95)])
        cf_99 = max(time_list[:int(len(time_list) * 0.99)])
        cf_100 = max(time_list[:int(len(time_list) * 1)])
        ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead

        tf.compat.v1.logging.info("-----------------------------")
        tf.compat.v1.logging.info(
            "Total Inference Time = %0.2f for Sentences = %d",
            eval_time_elapsed, eval_hooks[-1].count * FLAGS.predict_batch_size)
        tf.compat.v1.logging.info(
            "Total Inference Time W/O Overhead = %0.2f for Sentences = %d",
            eval_time_wo_overhead, num_sentences)
        tf.compat.v1.logging.info("Summary Inference Statistics")
        tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size)
        tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length)
        tf.compat.v1.logging.info("Precision = %s",
                                  "fp16" if FLAGS.use_fp16 else "fp32")
        tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f",
                                  cf_50 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f",
                                  cf_90 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f",
                                  cf_95 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f",
                                  cf_99 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f",
                                  cf_100 * 1000)
        tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000)
        tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f",
                                  ss_sentences_per_second)
        dllogging.logger.log(step=(),
                             data={"throughput_val": ss_sentences_per_second},
                             verbosity=Verbosity.DEFAULT)
        tf.compat.v1.logging.info("-----------------------------")

        tf.compat.v1.logging.info('Reading: %s', test_labels_file)
        with tf.io.gfile.Open(test_labels_file, "r") as f:
            counts = evaluate(f)
        eval_result = report_notprint(counts)
        print(''.join(eval_result))
        with tf.io.gfile.Open(
                os.path.join(FLAGS.output_dir, 'test_results_conlleval.txt'),
                'w') as fd:
            fd.write(''.join(eval_result))
Beispiel #25
0
def main(_):
    logging.set_verbosity(logging.INFO)
    tf.enable_resource_variables()
    TrainEval(FLAGS.root_dir, suite_atari.game(name=FLAGS.game_name),
              **get_run_args()).run()
def main(_):
    # causes memory fragmentation for bert leading to OOM
    if os.environ.get("TF_XLA_FLAGS", None) is not None:
        os.environ["TF_XLA_FLAGS"] += "--tf_xla_enable_lazy_compilation=false"
    else:
        os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false"

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
    dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path)

    if FLAGS.horovod:
        hvd.init()

    processors = {
        "chemprot": BioBERTChemprotProcessor,
        'mednli': MedNLIProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.io.gfile.makedirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    master_process = True
    training_hooks = []
    global_batch_size = FLAGS.train_batch_size
    hvd_rank = 0

    config = tf.compat.v1.ConfigProto()
    if FLAGS.horovod:
        global_batch_size = FLAGS.train_batch_size * hvd.size()
        master_process = (hvd.rank() == 0)
        hvd_rank = hvd.rank()
        config.gpu_options.visible_device_list = str(hvd.local_rank())
        if hvd.size() > 1:
            training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))

    if FLAGS.use_xla:
        config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
        tf.enable_resource_variables()
    run_config = tf.estimator.RunConfig(
        model_dir=FLAGS.output_dir if master_process else None,
        session_config=config,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps
        if master_process else None,
        keep_checkpoint_max=1)

    if master_process:
        tf.compat.v1.logging.info("***** Configuaration *****")
        for key in FLAGS.__flags.keys():
            tf.compat.v1.logging.info('  {}: {}'.format(
                key, getattr(FLAGS, key)))
        tf.compat.v1.logging.info("**************************")

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank))

    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / global_batch_size * FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        start_index = 0
        end_index = len(train_examples)
        tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")]

        if FLAGS.horovod:
            tmp_filenames = [
                os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i))
                for i in range(hvd.size())
            ]
            num_examples_per_rank = len(train_examples) // hvd.size()
            remainder = len(train_examples) % hvd.size()
            if hvd.rank() < remainder:
                start_index = hvd.rank() * (num_examples_per_rank + 1)
                end_index = start_index + num_examples_per_rank + 1
            else:
                start_index = hvd.rank() * num_examples_per_rank + remainder
                end_index = start_index + (num_examples_per_rank)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate
                                if not FLAGS.horovod else FLAGS.learning_rate *
                                hvd.size(),
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_one_hot_embeddings=False,
                                hvd=None if not FLAGS.horovod else hvd,
                                amp=FLAGS.amp)

    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)

    if FLAGS.do_train:
        file_based_convert_examples_to_features(
            train_examples[start_index:end_index], label_list,
            FLAGS.max_seq_length, tokenizer, tmp_filenames[hvd_rank])
        tf.compat.v1.logging.info("***** Running training *****")
        tf.compat.v1.logging.info("  Num examples = %d", len(train_examples))
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.compat.v1.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=tmp_filenames,
            batch_size=FLAGS.train_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            hvd=None if not FLAGS.horovod else hvd)

        train_start_time = time.time()
        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=training_hooks)
        train_time_elapsed = time.time() - train_start_time
        train_time_wo_overhead = training_hooks[-1].total_time
        avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed
        ss_sentences_per_second = (
            num_train_steps - training_hooks[-1].skipped
        ) * global_batch_size * 1.0 / train_time_wo_overhead

        if master_process:
            tf.compat.v1.logging.info("-----------------------------")
            tf.compat.v1.logging.info(
                "Total Training Time = %0.2f for Sentences = %d",
                train_time_elapsed, num_train_steps * global_batch_size)
            tf.compat.v1.logging.info(
                "Total Training Time W/O Overhead = %0.2f for Sentences = %d",
                train_time_wo_overhead,
                (num_train_steps - training_hooks[-1].skipped) *
                global_batch_size)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) with overhead = %0.2f",
                avg_sentences_per_second)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) = %0.2f",
                ss_sentences_per_second)
            dllogging.logger.log(
                step=(),
                data={"throughput_train": ss_sentences_per_second},
                verbosity=Verbosity.DEFAULT)
            tf.compat.v1.logging.info("-----------------------------")

    if FLAGS.do_eval and master_process:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.compat.v1.logging.info("***** Running evaluation *****")
        tf.compat.v1.logging.info(
            "  Num examples = %d (%d actual, %d padding)", len(eval_examples),
            num_actual_eval_examples,
            len(eval_examples) - num_actual_eval_examples)
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None

        eval_drop_remainder = False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            batch_size=FLAGS.eval_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            tf.compat.v1.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.compat.v1.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict and master_process:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.compat.v1.logging.info("***** Running prediction*****")
        tf.compat.v1.logging.info(
            "  Num examples = %d (%d actual, %d padding)",
            len(predict_examples), num_actual_predict_examples,
            len(predict_examples) - num_actual_predict_examples)
        tf.compat.v1.logging.info("  Batch size = %d",
                                  FLAGS.predict_batch_size)

        predict_drop_remainder = False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            batch_size=FLAGS.predict_batch_size,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)]
        eval_start_time = time.time()

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.io.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.compat.v1.logging.info("***** Predict results *****")
            for prediction in estimator.predict(input_fn=predict_input_fn,
                                                hooks=eval_hooks,
                                                yield_single_examples=True):
                probabilities = prediction["probabilities"]
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples

        eval_time_elapsed = time.time() - eval_start_time

        time_list = eval_hooks[-1].time_list
        time_list.sort()
        # Removing outliers (init/warmup) in throughput computation.
        eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)])
        num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size

        avg = np.mean(time_list)
        cf_50 = max(time_list[:int(len(time_list) * 0.50)])
        cf_90 = max(time_list[:int(len(time_list) * 0.90)])
        cf_95 = max(time_list[:int(len(time_list) * 0.95)])
        cf_99 = max(time_list[:int(len(time_list) * 0.99)])
        cf_100 = max(time_list[:int(len(time_list) * 1)])
        ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead

        tf.compat.v1.logging.info("-----------------------------")
        tf.compat.v1.logging.info(
            "Total Inference Time = %0.2f for Sentences = %d",
            eval_time_elapsed, eval_hooks[-1].count * FLAGS.predict_batch_size)
        tf.compat.v1.logging.info(
            "Total Inference Time W/O Overhead = %0.2f for Sentences = %d",
            eval_time_wo_overhead, num_sentences)
        tf.compat.v1.logging.info("Summary Inference Statistics")
        tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size)
        tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length)
        tf.compat.v1.logging.info("Precision = %s",
                                  "fp16" if FLAGS.amp else "fp32")
        tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f",
                                  cf_50 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f",
                                  cf_90 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f",
                                  cf_95 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f",
                                  cf_99 * 1000)
        tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f",
                                  cf_100 * 1000)
        tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000)
        tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f",
                                  ss_sentences_per_second)
        dllogging.logger.log(step=(),
                             data={"throughput_val": ss_sentences_per_second},
                             verbosity=Verbosity.DEFAULT)
        tf.compat.v1.logging.info("-----------------------------")
Beispiel #27
0
def main(_):
    tf.compat.v1.enable_resource_variables()
    logging.set_verbosity(logging.INFO)
    tf.enable_resource_variables()
    train_eval(FLAGS.root_dir, num_iterations=FLAGS.num_iterations)
Beispiel #28
0
import time
import os
import logging
import numpy as np
import tensorflow as tf
import gym
import argparse
import shutil

from replay_buffer.segment_tree import ReplayBuffer
from simulator.env_ops import MultiThreadEnv
from teflon.policy.SAC import SAC as SAC
from teflon.multi_step import MultistepAggregator

tf.enable_resource_variables()


def explorer(env,
             policy,
             start_transitions=30000,
             explorer_noise=0.1,
             initial_random=True):
    """
    explorer makes transitions from environment (called Actor in Ape-X paper).

    :param MultiThreadEnv env:
    :param SAC policy:
    :param int start_transitions: transition numbers collected from random actions.
    :param bool initial_random: whether it uses random actions at first or not
                                During finetuning this is set to False.
Beispiel #29
0
def main(_):
    os.environ[
        "TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false"  #causes memory fragmentation for bert leading to OOM

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
    dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if FLAGS.horovod:
        import horovod.tensorflow as hvd
        hvd.init()

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    tf.io.gfile.makedirs(FLAGS.output_dir)

    input_files = []
    for input_file_dir in FLAGS.input_files_dir.split(","):
        input_files.extend(tf.io.gfile.glob(os.path.join(input_file_dir, "*")))

    if FLAGS.horovod and len(input_files) < hvd.size():
        raise ValueError("Input Files must be sharded")
    if FLAGS.use_fp16 and FLAGS.manual_fp16:
        raise ValueError(
            "AMP and Manual Mixed Precision Training are both activated! Error"
        )

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    config = tf.compat.v1.ConfigProto()
    if FLAGS.horovod:
        config.gpu_options.visible_device_list = str(hvd.local_rank())
        if hvd.rank() == 0:
            tf.compat.v1.logging.info("***** Configuaration *****")
            for key in FLAGS.__flags.keys():
                tf.compat.v1.logging.info('  {}: {}'.format(
                    key, getattr(FLAGS, key)))
            tf.compat.v1.logging.info("**************************")


#    config.gpu_options.per_process_gpu_memory_fraction = 0.7
    if FLAGS.use_xla:
        config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
        config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.NO_MEM_OPT
        tf.enable_resource_variables()

    run_config = tf.estimator.RunConfig(
        model_dir=FLAGS.output_dir,
        session_config=config,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps
        if not FLAGS.horovod or hvd.rank() == 0 else None,
        save_summary_steps=FLAGS.save_checkpoints_steps
        if not FLAGS.horovod or hvd.rank() == 0 else None,
        # This variable controls how often estimator reports examples/sec.
        # Default value is every 100 steps.
        # When --report_loss is True, we set to very large value to prevent
        # default info reporting from estimator.
        # Ideally we should set it to None, but that does not work.
        log_step_count_steps=10000 if FLAGS.report_loss else 100)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate
                                if not FLAGS.horovod else FLAGS.learning_rate *
                                hvd.size(),
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_one_hot_embeddings=False,
                                hvd=None if not FLAGS.horovod else hvd)

    estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config)

    if FLAGS.do_train:

        training_hooks = []
        if FLAGS.horovod and hvd.size() > 1:
            training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
        if (not FLAGS.horovod or hvd.rank() == 0):
            global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps if not FLAGS.horovod else FLAGS.train_batch_size * FLAGS.num_accumulation_steps * hvd.size(
            )
            training_hooks.append(
                _LogSessionRunHook(global_batch_size,
                                   FLAGS.num_accumulation_steps, dllogging,
                                   FLAGS.display_loss_steps,
                                   FLAGS.save_checkpoints_steps,
                                   FLAGS.report_loss))

        tf.compat.v1.logging.info("***** Running training *****")
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        train_input_fn = input_fn_builder(
            input_files=input_files,
            batch_size=FLAGS.train_batch_size,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=True,
            hvd=None if not FLAGS.horovod else hvd)

        train_start_time = time.time()
        estimator.train(input_fn=train_input_fn,
                        hooks=training_hooks,
                        max_steps=FLAGS.num_train_steps)
        train_time_elapsed = time.time() - train_start_time

        if (not FLAGS.horovod or hvd.rank() == 0):
            train_time_wo_overhead = training_hooks[-1].total_time
            avg_sentences_per_second = FLAGS.num_train_steps * global_batch_size * 1.0 / train_time_elapsed
            ss_sentences_per_second = (
                FLAGS.num_train_steps - training_hooks[-1].skipped
            ) * global_batch_size * 1.0 / train_time_wo_overhead

            tf.compat.v1.logging.info("-----------------------------")
            tf.compat.v1.logging.info(
                "Total Training Time = %0.2f for Sentences = %d",
                train_time_elapsed, FLAGS.num_train_steps * global_batch_size)
            tf.compat.v1.logging.info(
                "Total Training Time W/O Overhead = %0.2f for Sentences = %d",
                train_time_wo_overhead,
                (FLAGS.num_train_steps - training_hooks[-1].skipped) *
                global_batch_size)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) with overhead = %0.2f",
                avg_sentences_per_second)
            tf.compat.v1.logging.info(
                "Throughput Average (sentences/sec) = %0.2f",
                ss_sentences_per_second)
            dllogging.logger.log(
                step=(),
                data={"throughput_train": ss_sentences_per_second},
                verbosity=Verbosity.DEFAULT)
            tf.compat.v1.logging.info("-----------------------------")

    if FLAGS.do_eval and (not FLAGS.horovod or hvd.rank() == 0):
        tf.compat.v1.logging.info("***** Running evaluation *****")
        tf.compat.v1.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_files = []
        for eval_file_dir in FLAGS.eval_files_dir.split(","):
            eval_files.extend(
                tf.io.gfile.glob(os.path.join(eval_file_dir, "*")))

        eval_input_fn = input_fn_builder(
            input_files=eval_files,
            batch_size=FLAGS.eval_batch_size,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False,
            hvd=None if not FLAGS.horovod else hvd)

        eval_hooks = [LogEvalRunHook(FLAGS.eval_batch_size)]
        eval_start_time = time.time()
        result = estimator.evaluate(input_fn=eval_input_fn,
                                    steps=FLAGS.max_eval_steps,
                                    hooks=eval_hooks)

        eval_time_elapsed = time.time() - eval_start_time
        time_list = eval_hooks[-1].time_list
        time_list.sort()
        # Removing outliers (init/warmup) in throughput computation.
        eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)])
        num_sentences = (int(len(time_list) * 0.99)) * FLAGS.eval_batch_size

        ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead

        tf.compat.v1.logging.info("-----------------------------")
        tf.compat.v1.logging.info(
            "Total Inference Time = %0.2f for Sentences = %d",
            eval_time_elapsed, eval_hooks[-1].count * FLAGS.eval_batch_size)
        tf.compat.v1.logging.info(
            "Total Inference Time W/O Overhead = %0.2f for Sentences = %d",
            eval_time_wo_overhead, num_sentences)
        tf.compat.v1.logging.info("Summary Inference Statistics on EVAL set")
        tf.compat.v1.logging.info("Batch size = %d", FLAGS.eval_batch_size)
        tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length)
        tf.compat.v1.logging.info("Precision = %s",
                                  "fp16" if FLAGS.use_fp16 else "fp32")
        tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f",
                                  ss_sentences_per_second)
        dllogging.logger.log(step=(),
                             data={"throughput_val": ss_sentences_per_second},
                             verbosity=Verbosity.DEFAULT)
        tf.compat.v1.logging.info("-----------------------------")

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            tf.compat.v1.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.compat.v1.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
def main(_):

    print(FLAGS)
    print(tf.__version__, "==tensorflow version==")

    os.environ[
        'NCCL_LL_THRESHOLD'] = '0'  # to avoid collective reduce hangs on
    # os.environ['TF_ENABLE_WHILE_V2'] = '1'
    # os.environ['TF_ENABLE_COND_V2'] = '1'

    tf.enable_resource_variables()

    init_checkpoint = os.path.join(FLAGS.buckets, FLAGS.init_checkpoint)
    train_file = []
    for file in FLAGS.train_file.split(","):
        train_file_path = os.path.join(FLAGS.buckets, file)
        train_file.append(train_file_path)
    # train_file = os.path.join(FLAGS.buckets, FLAGS.train_file)
    # dev_file = os.path.join(FLAGS.buckets, FLAGS.dev_file)

    dev_file = []
    for file in FLAGS.dev_file.split(","):
        dev_file_path = os.path.join(FLAGS.buckets, file)
        dev_file.append(dev_file_path)
    checkpoint_dir = os.path.join(FLAGS.buckets, FLAGS.model_output)

    print(init_checkpoint, train_file, dev_file, checkpoint_dir)

    if FLAGS.distribution_strategy == "MirroredStrategy":
        cross_tower_ops = cross_tower_ops_lib.AllReduceCrossTowerOps(
            "nccl", 10, 0, 0)
        distribution = tf.contrib.distribute.MirroredStrategy(
            num_gpus=FLAGS.num_gpus, cross_tower_ops=cross_tower_ops)
        worker_count = FLAGS.num_gpus
    else:
        cross_tower_ops = cross_tower_ops_lib.AllReduceCrossTowerOps(
            "nccl", 10, 0, 0)
        distribution = tf.contrib.distribute.MirroredStrategy(
            num_gpus=FLAGS.num_gpus, cross_tower_ops=cross_tower_ops)
        worker_count = FLAGS.num_gpus

    sess_config = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=True)

    run_config = tf.estimator.RunConfig(
        keep_checkpoint_max=10,
        # model_dir=checkpoint_dir,
        train_distribute=distribution,  # tf 1.8
        # distribute=distribution,     # tf 1.4
        session_config=sess_config,
        save_checkpoints_secs=None,
        save_checkpoints_steps=None,
        log_step_count_steps=100)

    task_index = run_config.task_id
    is_chief = run_config.is_chief

    print("==worker_count==", worker_count, "==local_rank==", task_index,
          "==is is_chief==", is_chief)
    cluster = ""
    target = ""

    print(FLAGS)

    if FLAGS.mode == "single_task":
        train_eval_api = train_eval
    elif FLAGS.mode == "multi_task":
        train_eval_api = multitask_train_eval
    elif FLAGS.mode == 'distillation':
        train_eval_api = distillation_train_eval
    elif FLAGS.mode == "electra":
        train_eval_api = pretrain_train_eval

    if FLAGS.mode == "electra":
        train_eval_api.monitored_estimator(
            FLAGS=FLAGS,
            worker_count=worker_count,
            task_index=task_index,
            cluster=cluster,
            is_chief=is_chief,
            init_checkpoint=init_checkpoint,
            train_file=train_file,
            dev_file=dev_file,
            checkpoint_dir=checkpoint_dir,
            run_config=run_config,
            distribution_strategy=FLAGS.distribution_strategy,
            profiler=FLAGS.profiler,
            parse_type=FLAGS.parse_type,
            rule_model=FLAGS.rule_model,
            train_op=FLAGS.train_op,
            running_type=FLAGS.running_type,
            decay=FLAGS.decay,
            warmup=FLAGS.warmup,
            input_target=FLAGS.input_target,
            distillation=FLAGS.distillation,
            temperature=FLAGS.temperature,
            distillation_ratio=FLAGS.distillation_ratio,
            electra_mode=FLAGS.electra_mode,
            sharing_mode=FLAGS.sharing_mode,
            attention_type=FLAGS.attention_type,
            ues_token_type=FLAGS.ues_token_type,
            gumbel_anneal=FLAGS.gumbel_anneal,
            annealed_mask_prob=FLAGS.annealed_mask_prob,
            joint_train=FLAGS.joint_train,
            optimization_type=FLAGS.optimization_type,
            gen_disc_type=FLAGS.gen_disc_type,
            train_op_type=FLAGS.train_op_type,
            mask_method=FLAGS.mask_method,
            minmax_mode=FLAGS.minmax_mode,
            seq_type=FLAGS.seq_type,
            mask_type=FLAGS.mask_type)
        # use_tpu=FLAGS.use_tpu)
    else:
        train_eval_api.monitored_estimator(
            FLAGS=FLAGS,
            worker_count=worker_count,
            task_index=task_index,
            cluster=cluster,
            is_chief=is_chief,
            target=target,
            init_checkpoint=init_checkpoint,
            train_file=train_file,
            dev_file=dev_file,
            checkpoint_dir=checkpoint_dir,
            run_config=run_config,
            distribution_strategy=FLAGS.distribution_strategy,
            profiler=FLAGS.profiler,
            parse_type=FLAGS.parse_type,
            rule_model=FLAGS.rule_model,
            train_op=FLAGS.train_op,
            running_type=FLAGS.running_type,
            decay=FLAGS.decay,
            warmup=FLAGS.warmup,
            input_target=FLAGS.input_target,
            distillation=FLAGS.distillation,
            temperature=FLAGS.temperature,
            distillation_ratio=FLAGS.distillation_ratio,
            attention_type=FLAGS.attention_type,
            ues_token_type=FLAGS.ues_token_type,
            seq_type=FLAGS.seq_type,
            mask_type=FLAGS.mask_type)
Beispiel #31
0
def main(_):

  # If using update_damping_immediately resource variables must be enabled.
  if FLAGS.update_damping_immediately:
    tf.enable_resource_variables()

  if not FLAGS.use_sua_approx:
    if FLAGS.use_custom_patches_op:
      kfac.fisher_factors.set_global_constants(
          use_patches_second_moment_op=True
          )
    else:
      # Temporary measure to save memory with giant batches:
      kfac.fisher_factors.set_global_constants(
          sub_sample_inputs=True,
          inputs_to_extract_patches_factor=0.2)

  tf.set_random_seed(FLAGS.seed)
  (train_op, opt, batch_loss, batch_error, batch_size_schedule, batch_size,
   eval_loss, eval_error,
   eval_loss_avg, eval_error_avg) = construct_train_quants()

  global_step = tf.train.get_or_create_global_step()

  if FLAGS.optimizer == 'kfac':
    # We need to put the control depenency on train_op here so that we are
    # guaranteed to get the up-to-date values of these various quantities.
    # Otherwise there is a race condition and we might get the old values,
    # nondeterministically. Another solution would be to get these values in
    # a separate sess.run call, but this can sometimes cause problems with
    # training frameworks that use hooks (see the comments below).
    with tf.control_dependencies([train_op]):
      learning_rate = opt.learning_rate
      momentum = opt.momentum
      damping = opt.damping
      rho = opt.rho
      qmodel_change = opt.qmodel_change

  # Without setting allow_soft_placement=True there will be problems when
  # the optimizer tries to place certain ops like "mod" on the GPU (which isn't
  # supported).
  config = tf.ConfigProto(allow_soft_placement=True)

  # Train model.

  # It's good practice to put everything into a single sess.run call. The
  # reason is that certain "training frameworks" like to run hooks at each
  # sess.run call, and there is an implicit expectation there will only
  # be one sess.run call every "iteration" of the "optimizer". For example,
  # a framework might try to print the loss at each sess.run call, causing
  # the mini-batch to be advanced, thus completely breaking the "cached
  # batch" mechanism that the damping adaptation method may rely on. (Plus
  # there will also be the extra cost of having to reevaluate the loss
  # twice.)  That being said we don't completely do that here because it's
  # inconvenient.
  with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30,
                                         config=config) as sess:
    for _ in range(FLAGS.train_steps):
      i = sess.run(global_step)

      if FLAGS.use_batch_size_schedule:
        batch_size_ = batch_size_schedule[min(i, len(batch_size_schedule) - 1)]
      else:
        batch_size_ = FLAGS.batch_size

      if FLAGS.optimizer == 'kfac':
        (_, batch_loss_, batch_error_, learning_rate_, momentum_, damping_,
         rho_, qmodel_change_) = sess.run([train_op, batch_loss, batch_error,
                                           learning_rate, momentum, damping,
                                           rho, qmodel_change],
                                          feed_dict={batch_size: batch_size_})
      else:
        _, batch_loss_, batch_error_ = sess.run(
            [train_op, batch_loss, batch_error],
            feed_dict={batch_size: batch_size_})

      # Print training stats.
      tf.logging.info(
          'iteration: %d', i)
      tf.logging.info(
          'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ',
          batch_size_, batch_loss_, batch_error_)

      if FLAGS.optimizer == 'kfac':
        tf.logging.info(
            'learning_rate = %f | momentum = %f',
            learning_rate_, momentum_)
        tf.logging.info(
            'damping = %f | rho = %f | qmodel_change = %f',
            damping_, rho_, qmodel_change_)

      # "Eval" here means just compute stuff on the full training set.
      if (i+1) % FLAGS.eval_every == 0:
        eval_loss_, eval_error_, eval_loss_avg_, eval_error_avg_ = sess.run(
            [eval_loss, eval_error, eval_loss_avg, eval_error_avg])
        tf.logging.info('-----------------------------------------------------')
        tf.logging.info('eval_loss = %f | eval_error = %f',
                        eval_loss_, eval_error_)
        tf.logging.info('eval_loss_avg = %f | eval_error_avg = %f',
                        eval_loss_avg_, eval_error_avg_)
        tf.logging.info('-----------------------------------------------------')
      else:
        tf.logging.info('----')
Beispiel #32
0
def main(argv):
  del argv  # Unused.

  tf.enable_resource_variables()
  tf.set_random_seed(FLAGS.seed)
  set_lr_schedule()
  set_custom_sparsity_map()
  folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity),
                             str(FLAGS.maskupdate_begin_step),
                             str(FLAGS.maskupdate_end_step),
                             str(FLAGS.maskupdate_frequency),
                             str(FLAGS.drop_fraction),
                             str(FLAGS.label_smoothing),
                             str(FLAGS.weight_decay))

  output_dir = FLAGS.output_dir
  if FLAGS.use_folder_stub:
    output_dir = os.path.join(output_dir, folder_stub)

  export_dir = os.path.join(output_dir, 'export_dir')

  # we pass the updated eval and train string to the params dictionary.
  params = {}
  params['output_dir'] = output_dir
  params['training_method'] = FLAGS.training_method
  params['use_tpu'] = FLAGS.use_tpu

  dataset_func = functools.partial(
      imagenet_input.ImageNetInput, data_dir=FLAGS.data_directory,
      transpose_input=False, num_parallel_calls=FLAGS.num_parallel_calls,
      use_bfloat16=False)
  imagenet_train, imagenet_eval = [dataset_func(is_training=is_training)
                                   for is_training in [True, False]]

  run_config = tpu_config.RunConfig(
      master=FLAGS.master,
      model_dir=output_dir,
      save_checkpoints_steps=FLAGS.steps_per_checkpoint,
      keep_checkpoint_max=FLAGS.keep_checkpoint_max,
      session_config=tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=False),
      tpu_config=tpu_config.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_cores,
          tpu_job_name=FLAGS.tpu_job_name))

  classifier = tpu_estimator.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=resnet_model_fn_w_pruning,
      params=params,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size)

  cpu_classifier = tpu_estimator.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=resnet_model_fn_w_pruning,
      params=params,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      export_to_tpu=False,
      eval_batch_size=FLAGS.eval_batch_size)

  if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0:
    raise ValueError(
        'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' %
        (FLAGS.eval_batch_size, FLAGS.num_eval_images))

  eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
  if FLAGS.mode == 'eval_once':
    ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix)
    dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
    classifier.evaluate(
        input_fn=dataset.input_fn,
        steps=eval_steps,
        checkpoint_path=ckpt_path,
        name='{0}'.format(FLAGS.eval_once_ckpt_prefix))
  elif FLAGS.mode == 'eval':
    # Run evaluation when there's a new checkpoint
    for ckpt in evaluation.checkpoints_iterator(output_dir):
      tf.logging.info('Starting to evaluate.')
      try:
        dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval
        classifier.evaluate(
            input_fn=dataset.input_fn,
            steps=eval_steps,
            checkpoint_path=ckpt,
            name='eval')
        # Terminate eval job when final checkpoint is reached
        global_step = int(os.path.basename(ckpt).split('-')[1])
        if global_step >= FLAGS.train_steps:
          tf.logging.info(
              'Evaluation finished after training step %d' % global_step)
          break

      except tf.errors.NotFoundError:
        logging('Checkpoint no longer exists,skipping checkpoint.')

  else:
    global_step = estimator._load_global_step_from_checkpoint_dir(output_dir)
    # Session run hooks to export model for prediction
    export_hook = ExportModelHook(cpu_classifier, export_dir)
    hooks = [export_hook]

    if FLAGS.mode == 'train':
      tf.logging.info('start training...')
      classifier.train(
          input_fn=imagenet_train.input_fn,
          hooks=hooks,
          max_steps=FLAGS.train_steps)
    else:
      assert FLAGS.mode == 'train_and_eval'
      tf.logging.info('start training and eval...')
      while global_step < FLAGS.train_steps:
        next_checkpoint = min(global_step + FLAGS.steps_per_eval,
                              FLAGS.train_steps)
        classifier.train(
            input_fn=imagenet_train.input_fn, max_steps=next_checkpoint)
        global_step = next_checkpoint
        logging('Completed training up to step :', global_step)
        classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps)