def main(argv):
    global counter

    print(argv, '\n', '=' * 20)
    '''
    if args.query:
        t2t_query.main(argv)
        exit()
    '''
    if train_not_test:
        while counter < limit or args.no_limit:

            tf.flags.FLAGS.set_default('train_steps', counter + args.increment)
            tf.flags.FLAGS.train_steps = counter + args.increment
            print('flag:', tf.flags.FLAGS.get_flag_value('train_steps', 5),
                  str(counter + args.increment))

            t2t_trainer.main(argv)

            counter += args.increment
            print('=' * 50, counter, limit, '=' * 50)

    else:
        t2t_decoder.main(argv)
    pass
Esempio n. 2
0
def train_world_model(problem_name,
                      data_dir,
                      output_dir,
                      hparams,
                      epoch,
                      use_autoencoder=False):
    """Train the world model on problem_name."""
    train_steps = hparams.model_train_steps * (epoch + 2)
    with temporary_flags({
            "data_dir":
            data_dir,
            "output_dir":
            output_dir,
            "problem":
            problem_name,
            "model":
            hparams.generative_model,
            "hparams_set":
            hparams.generative_model_params,
            "eval_steps":
            100,
            "train_steps":
            train_steps,
            # Hack: If training on autoencoded frames, autoencoder_path needs to be
            # set so that the problem reports the right sizes for frames.
            "autoencoder_path":
            "dummy" if use_autoencoder else None,
    }):
        t2t_trainer.main([])
Esempio n. 3
0
def train_world_model(problem_name,
                      data_dir,
                      output_dir,
                      hparams,
                      epoch,
                      use_autoencoder=False):
    """Train the world model on problem_name."""
    train_steps = hparams.model_train_steps * (epoch + 2)
    with temporary_flags({
            "data_dir":
            data_dir,
            "output_dir":
            output_dir,
            "problem":
            problem_name,
            "model":
            hparams.generative_model,
            "hparams_set":
            hparams.generative_model_params,
            "eval_steps":
            100,
            "train_steps":
            train_steps,
            "autoencoder_path":
            "dummy" if use_autoencoder else None,
    }):
        t2t_trainer.main([])
def train(hparams, output_dir):
  prefix = output_dir
  #remove trash
  # prefix = "~/trash/loop_{}".format(random.randint(10000, 99999))
  data_dir = os.path.expanduser(prefix + "/data")
  tmp_dir = os.path.expanduser(prefix + "/tmp")
  output_dir = os.path.expanduser(prefix + "/output")
  tf.gfile.MakeDirs(data_dir)
  tf.gfile.MakeDirs(tmp_dir)
  tf.gfile.MakeDirs(output_dir)
  last_model = ""
  start_time = time.time()
  line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    "
  for iloop in range(hparams.epochs):
      time_delta = time.time() - start_time
      print(line+"Step {}.1. - generate data from policy. "
            "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta))))
      FLAGS.problems = "gym_discrete_problem"
      FLAGS.agent_policy_path = last_model
      gym_problem = problems.problem(FLAGS.problems)
      gym_problem.num_steps = hparams.true_env_generator_num_steps
      iter_data_dir = os.path.join(data_dir, str(iloop))
      tf.gfile.MakeDirs(iter_data_dir)
      gym_problem.generate_data(iter_data_dir, tmp_dir)

      time_delta = time.time() - start_time
      print(line+"Step {}.2. - generate env model. "
            "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta))))
      # 2. generate env model
      FLAGS.data_dir = iter_data_dir
      FLAGS.output_dir = output_dir
      FLAGS.model = hparams.generative_model
      FLAGS.hparams_set = hparams.generative_model_params
      FLAGS.train_steps = hparams.model_train_steps
      FLAGS.eval_steps = 1
      t2t_trainer.main([])

      time_delta = time.time() - start_time
      print(line+"Step {}.3. - evalue env model. "
            "Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta))))
      gym_simulated_problem = problems.problem("gym_simulated_discrete_problem")
      gym_simulated_problem.num_steps = hparams.simulated_env_generator_num_steps
      gym_simulated_problem.generate_data(iter_data_dir, tmp_dir)

      # time_delta = time.time() - start_time
      print(line+"Step {}.4. - train PPO in model env."
            " Time: {}".format(iloop, str(datetime.timedelta(seconds=time_delta))))
      ppo_epochs_num=hparams.ppo_epochs_num
      ppo_hparams = trainer_lib.create_hparams("atari_base", "epochs_num={},simulated_environment=True,eval_every_epochs=0,save_models_every_epochs={}".format(ppo_epochs_num+1, ppo_epochs_num),
                                           data_dir=output_dir)
      ppo_hparams.epoch_length = hparams.ppo_epoch_length
      ppo_dir = tempfile.mkdtemp(dir=data_dir, prefix="ppo_")
      in_graph_wrappers = [(TimeLimitWrapper, {"timelimit": 150}),
                           (PongT2TGeneratorHackWrapper, {"add_value": -2})] + gym_problem.in_graph_wrappers
      ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)
      rl_trainer_lib.train(ppo_hparams, "PongNoFrameskip-v4", ppo_dir)

      last_model = ppo_dir + "/model{}.ckpt".format(ppo_epochs_num)
Esempio n. 5
0
 def testTrain(self):
   FLAGS.problem = "tiny_algo"
   FLAGS.model = "transformer"
   FLAGS.hparams_set = "transformer_tiny"
   FLAGS.train_steps = 1
   FLAGS.eval_steps = 1
   FLAGS.output_dir = tf.test.get_temp_dir()
   FLAGS.data_dir = tf.test.get_temp_dir()
   t2t_trainer.main(None)
Esempio n. 6
0
 def testTrain(self):
     FLAGS.problem = "tiny_algo"
     FLAGS.model = "transformer"
     FLAGS.hparams_set = "transformer_tiny"
     FLAGS.train_steps = 1
     FLAGS.eval_steps = 1
     FLAGS.output_dir = tf.test.get_temp_dir()
     FLAGS.data_dir = tf.test.get_temp_dir()
     t2t_trainer.main(None)
Esempio n. 7
0
def train(generate_data=True):
    FLAGS.problem = "fix_grammar_mistakes"
    FLAGS.model = "transformer"
    FLAGS.generate_data = True
    FLAGS.hparams_set = "transformer_base_small_gpu"
    FLAGS.t2t_usr_dir = "src"
    FLAGS.output_dir = "t2t_output"
    FLAGS.data_dir = "t2t_data"
    t2t_trainer.main(None)
Esempio n. 8
0
def train(generate_data=True):
    FLAGS.problem = "english_grammar_error"
    FLAGS.model = "transformer"
    FLAGS.generate_data = True
    FLAGS.hparams_set = "transformer_big_single_gpu"
    FLAGS.t2t_usr_dir = "src"
    FLAGS.output_dir = "finetune"
    FLAGS.data_dir = "t2t_finetune"
    FLAGS.train_steps = 350000
    t2t_trainer.main(None)
Esempio n. 9
0
    def test_e2e_export_and_query(self):
        """Test that we can export and query the model via tf.serving."""

        FLAGS.t2t_usr_dir = _get_t2t_usr_dir()
        FLAGS.problem = "github_function_docstring"
        FLAGS.data_dir = "/mnt/nfs-east1-d/data"
        FLAGS.tmp_dir = "/mnt/nfs-east1-d/tmp"
        FLAGS.output_dir = tempfile.mkdtemp()
        #FLAGS.export_dir = os.path.join(FLAGS.output_dir, "export")
        FLAGS.model = "similarity_transformer_dev"
        FLAGS.hparams_set = "similarity_transformer_tiny"
        FLAGS.train_steps = 1
        FLAGS.schedule = "train"

        timeout_secs = 10

        usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)

        t2t_trainer.main(None)

        export.main(None)

        # ----
        # Start model server

        # Will start a tf model server on an un-used port and
        # kill process on exit.
        _, server, _ = TensorflowModelServer().RunServer(
            FLAGS.model, FLAGS.output_dir)

        # ----
        # Query the server

        return

        doc_query = [1, 2, 3]  # Dummy encoded doc query
        code_query = [1, 2, 3]  # Dummy encoded code query

        # Alternatively for query, without going through query.main()
        # TODO: Is servable_name the same as model name?
        request_fn = serving_utils.make_grpc_request_fn(
            servable_name=FLAGS.model,
            server=server,
            timeout_secs=timeout_secs)

        # Compute embeddings
        # TODO: May need to customize how these queries are fed in, potentially
        #       side-stepping serving_utils.predict.
        encoded_string = serving_utils.predict([doc_query], problem_object,
                                               request_fn)
        encoded_code = serving_utils.predict([code_query], problem_object,
                                             request_fn)
Esempio n. 10
0
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch):
  """Train autoencoder on problem_name."""
  train_steps = hparams.autoencoder_train_steps * (epoch + 2)
  with temporary_flags({
      "problem": problem_name,
      "data_dir": data_dir,
      "output_dir": output_dir,
      "model": "autoencoder_ordered_discrete",
      "hparams_set": "autoencoder_discrete_pong",
      "train_steps": train_steps,
      "eval_steps": 100,
  }):
    t2t_trainer.main([])
Esempio n. 11
0
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch):
    """Train autoencoder on problem_name."""
    train_steps = hparams.autoencoder_train_steps * (epoch + 2)
    with temporary_flags({
            "problem": problem_name,
            "data_dir": data_dir,
            "output_dir": output_dir,
            "model": "autoencoder_ordered_discrete",
            "hparams_set": "autoencoder_discrete_pong",
            "train_steps": train_steps,
            "eval_steps": 100,
    }):
        t2t_trainer.main([])
Esempio n. 12
0
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch):
    """Train the world model on problem_name."""
    train_steps = hparams.model_train_steps * (epoch + 2)
    with temporary_flags({
            "data_dir": data_dir,
            "output_dir": output_dir,
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "eval_steps": 100,
            "train_steps": train_steps,
    }):
        t2t_trainer.main([])
Esempio n. 13
0
def train_autoencoder(problem_name, data_dir, output_dir, hparams, epoch):
    """Train autoencoder on problem_name."""
    additional_steps = 1 + hparams.autoencoder_train_steps_initial_multiplier
    train_steps = hparams.autoencoder_train_steps * (epoch + additional_steps)
    with temporary_flags({
            "problem": problem_name,
            "data_dir": data_dir,
            "output_dir": output_dir,
            "model": "autoencoder_ordered_discrete",
            "hparams_set": hparams.autoencoder_hparams_set,
            "train_steps": train_steps,
            "eval_steps": 100,
    }):
        t2t_trainer.main([])
Esempio n. 14
0
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch,
                      use_autoencoder=False):
  """Train the world model on problem_name."""
  train_steps = hparams.model_train_steps * (epoch + 2)
  with temporary_flags({
      "data_dir": data_dir,
      "output_dir": output_dir,
      "problem": problem_name,
      "model": hparams.generative_model,
      "hparams_set": hparams.generative_model_params,
      "eval_steps": 100,
      "train_steps": train_steps,
      # Hack: If training on autoencoded frames, autoencoder_path needs to be
      # set so that the problem reports the right sizes for frames.
      "autoencoder_path": "dummy" if use_autoencoder else None,
  }):
    t2t_trainer.main([])
Esempio n. 15
0
  def test_trains(self):
    """Test that we can export and query the model via tf.serving."""

    FLAGS.t2t_usr_dir = _get_t2t_usr_dir()
    FLAGS.problem = "github_function_docstring"
    FLAGS.data_dir = "/mnt/nfs-east1-d/data"
    FLAGS.tmp_dir = "/mnt/nfs-east1-d/tmp"
    FLAGS.output_dir = tempfile.mkdtemp()
    FLAGS.model = "similarity_transformer_dev"
    FLAGS.hparams_set = "similarity_transformer_tiny"
    FLAGS.train_steps = 1000
    FLAGS.schedule = "train"
    FLAGS.hparams = "'loss_variant=slicenet'"

    usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
    
    t2t_trainer.main(None)
Esempio n. 16
0
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch):
    """Train the world model on problem_name."""
    train_steps = hparams.model_train_steps * (
        epoch + hparams.inital_epoch_train_steps_multiplier)
    model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
    learning_rate = model_hparams.learning_rate_constant
    if epoch > 0: learning_rate *= hparams.learning_rate_bump
    with temporary_flags({
            "data_dir": data_dir,
            "output_dir": output_dir,
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "hparams": "learning_rate_constant=%.6f" % learning_rate,
            "eval_steps": 100,
            "local_eval_frequency": 2000,
            "train_steps": train_steps,
    }):
        t2t_trainer.main([])
Esempio n. 17
0
    def test_train_and_export(self):  # pylint: disable=no-self-use
        """Test that we can train and export the model."""

        test_data_dir = os.path.join(os.path.dirname(__file__), "test_data")
        # If we set t2t_usr_dir t2t_train.main will end up importing that
        # directory which causes an error because the model ends up being registered
        # twice.
        FLAGS.problem = "kf_github_function_docstring"
        FLAGS.data_dir = tempfile.mkdtemp()

        FLAGS.tmp_dir = tempfile.mkdtemp()
        logging.info("Using data_dir %s", FLAGS.data_dir)
        logging.info("Using tmp_dir %s", FLAGS.tmp_dir)

        FLAGS.output_dir = tempfile.mkdtemp()
        logging.info("Using output_dir %s", FLAGS.output_dir)

        FLAGS.model = similarity_transformer.MODEL_NAME
        FLAGS.hparams_set = "transformer_tiny"
        FLAGS.train_steps = 1
        FLAGS.eval_steps = 5

        # We want to trigger eval.
        FLAGS.local_eval_frequency = 1
        FLAGS.schedule = "continuous_train_and_eval"

        problem = registry.problem(FLAGS.problem)

        # Override the data path prefix and number of shards so we use
        # the test data rather than downloading from GCS.
        problem.DATA_PATH_PREFIX = os.path.join(test_data_dir, "raw_data")
        problem.NUM_SHARDS = 1

        # Generating the data can be slow because it uses an iterative process
        # to compute the vocab.
        # During development you can reuse data_dir between runs; if the vocab
        # and processed input files already exists in that directory it won't
        # need to regenerate them.
        problem.generate_data(FLAGS.data_dir, FLAGS.tmp_dir)

        t2t_trainer.main(None)

        export.main(None)
Esempio n. 18
0
def train_world_model(problem_name, data_dir, output_dir, hparams, epoch):
    """Train the world model on problem_name."""
    train_steps = hparams.model_train_steps * (epoch + 2)
    model_hparams = trainer_lib.create_hparams(hparams.generative_model_params)
    learning_rate = model_hparams.learning_rate_constant
    # Bump learning rate after first epoch by 3x.
    # We picked 3x because our default learning rate schedule decreases with
    # 1/square root of the time step; 1/sqrt(10k) = 0.01 and 1/sqrt(100k) ~ 0.0032
    # so by bumping it up 3x we about "go back" from 100k steps to 10k, which is
    # approximately as much as "going back 1 epoch" would be in default schedule.
    # In your experiments, you may want to optimize this rate to your schedule.
    if epoch > 0: learning_rate *= 3
    with temporary_flags({
            "data_dir": data_dir,
            "output_dir": output_dir,
            "problem": problem_name,
            "model": hparams.generative_model,
            "hparams_set": hparams.generative_model_params,
            "hparams": "learning_rate_constant=%.6f" % learning_rate,
            "eval_steps": 100,
            "train_steps": train_steps,
    }):
        t2t_trainer.main([])
Esempio n. 19
0
def main(argv):

  argv = common_flags.update_argv(argv)
  return t2t_trainer.main(argv)
Esempio n. 20
0
def train(hparams, output_dir):
  """Training function."""
  prefix = output_dir
  data_dir = os.path.expanduser(prefix + "/data")
  tmp_dir = os.path.expanduser(prefix + "/tmp")
  output_dir = os.path.expanduser(prefix + "/output")
  tf.gfile.MakeDirs(data_dir)
  tf.gfile.MakeDirs(tmp_dir)
  tf.gfile.MakeDirs(output_dir)
  last_model = ""
  start_time = time.time()
  line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    "
  epoch_metrics = []
  for iloop in range(hparams.epochs):
    # Generate random frames.
    if iloop == 0:
      time_delta = time.time() - start_time
      tf.logging.info("%s Step %d.0 - generate random data. Time: %s",
                      line, iloop, str(datetime.timedelta(seconds=time_delta)))
      FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
      FLAGS.agent_policy_path = ""
      gym_problem = registry.problem(FLAGS.problem)
      gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
      iter_data_dir = os.path.join(data_dir, "0random")
      tf.gfile.MakeDirs(iter_data_dir)
      gym_problem.generate_data(iter_data_dir, tmp_dir)
      mean_reward = gym_problem.sum_of_rewards / max(1.0, gym_problem.dones)
      tf.logging.info("%s Step 0.0 random reward: %.4f" % (line, mean_reward))

    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.1 - generate env model. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))

    # Train env model
    FLAGS.data_dir = iter_data_dir
    FLAGS.output_dir = output_dir
    FLAGS.model = hparams.generative_model
    FLAGS.hparams_set = hparams.generative_model_params
    FLAGS.train_steps = hparams.model_train_steps * (iloop + 2)
    FLAGS.eval_steps = 10
    t2t_trainer.main([])

    # Evaluate and dump frames from env model
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.1a - evaluate env model. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))
    gym_simulated_problem = registry.problem(
        "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game)
    sim_steps = hparams.simulated_env_generator_num_steps
    gym_simulated_problem.settable_num_steps = sim_steps
    gym_simulated_problem.real_env_problem = gym_problem
    gym_simulated_problem.generate_data(iter_data_dir, tmp_dir)
    model_reward_accuracy = 0.0
    if gym_simulated_problem.dones != 0:
      n = float(gym_simulated_problem.dones)
      model_reward_accuracy = (
          gym_simulated_problem.successful_episode_reward_predictions / n)

    # Train PPO agent
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.2 - train PPO in model env. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))

    # Setup PPO hparams
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params,
                                             data_dir=output_dir)
    ppo_epochs_num = hparams.ppo_epochs_num
    ppo_hparams.epochs_num = ppo_epochs_num
    ppo_hparams.simulated_environment = True
    ppo_hparams.eval_every_epochs = 0
    ppo_hparams.save_models_every_epochs = ppo_epochs_num
    ppo_hparams.epoch_length = hparams.ppo_epoch_length
    ppo_hparams.num_agents = hparams.ppo_num_agents
    ppo_hparams.problem = gym_problem

    in_graph_wrappers = [
        (TimeLimitWrapper, {"timelimit": hparams.ppo_time_limit}),
        (MaxAndSkipWrapper, {"skip": 4})]
    in_graph_wrappers += gym_problem.in_graph_wrappers
    ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)

    ppo_dir = generator_utils.make_tmp_dir(dir=data_dir, prefix="ppo_")
    rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name, ppo_dir)
    last_model = ppo_dir

    # Generate environment frames.
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.3 - generate environment data. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))
    FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
    FLAGS.agent_policy_path = last_model
    gym_problem = registry.problem(FLAGS.problem)
    gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
    iter_data_dir = os.path.join(data_dir, str(iloop))
    tf.gfile.MakeDirs(iter_data_dir)
    gym_problem.generate_data(iter_data_dir, tmp_dir)
    mean_reward = 0.0
    if gym_problem.dones != 0:
      mean_reward = gym_problem.sum_of_rewards / float(gym_problem.dones)
    tf.logging.info("%s Step %d mean reward: %.4f" % (line, iloop, mean_reward))

    # Report metrics.
    eval_metrics = {"model_reward_accuracy": model_reward_accuracy,
                    "mean_reward": mean_reward}
    epoch_metrics.append(eval_metrics)

  # Report the evaluation metrics from the final epoch
  return epoch_metrics[-1]
Esempio n. 21
0
from tensor2tensor.utils import registry
from tensor2tensor import problems
from tensor2tensor.bin import t2t_trainer
import sys

# print(problems.available()) #Show all problems

t2t_trainer.main(sys.argv)
Esempio n. 22
0
def main(argv):
    t2t_trainer.main(argv)
def train(hparams, output_dir):
    """Training function."""
    prefix = output_dir
    data_dir = os.path.expanduser(prefix + "/data")
    tmp_dir = os.path.expanduser(prefix + "/tmp")
    output_dir = os.path.expanduser(prefix + "/output")
    tf.gfile.MakeDirs(data_dir)
    tf.gfile.MakeDirs(tmp_dir)
    tf.gfile.MakeDirs(output_dir)
    last_model = ""
    start_time = time.time()
    line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    "
    for iloop in range(hparams.epochs):
        time_delta = time.time() - start_time
        print(line + "Step {}.1. - generate data from policy. "
              "Time: {}".format(iloop,
                                str(datetime.timedelta(seconds=time_delta))))
        FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
        FLAGS.agent_policy_path = last_model
        gym_problem = registry.problem(FLAGS.problem)
        gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
        iter_data_dir = os.path.join(data_dir, str(iloop))
        tf.gfile.MakeDirs(iter_data_dir)
        gym_problem.generate_data(iter_data_dir, tmp_dir)

        time_delta = time.time() - start_time
        print(line + "Step {}.2. - generate env model. "
              "Time: {}".format(iloop,
                                str(datetime.timedelta(seconds=time_delta))))
        # 2. generate env model
        FLAGS.data_dir = iter_data_dir
        FLAGS.output_dir = output_dir
        FLAGS.model = hparams.generative_model
        FLAGS.hparams_set = hparams.generative_model_params
        FLAGS.train_steps = hparams.model_train_steps * (iloop + 2)
        FLAGS.eval_steps = 10
        t2t_trainer.main([])

        # Dump frames from env model.
        time_delta = time.time() - start_time
        print(line + "Step {}.3. - evaluate env model. "
              "Time: {}".format(iloop,
                                str(datetime.timedelta(seconds=time_delta))))
        gym_simulated_problem = registry.problem(
            "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game)
        sim_steps = hparams.simulated_env_generator_num_steps
        gym_simulated_problem.settable_num_steps = sim_steps
        gym_simulated_problem.generate_data(iter_data_dir, tmp_dir)

        # PPO.
        time_delta = time.time() - start_time
        print(line + "Step {}.4. - train PPO in model env."
              " Time: {}".format(iloop,
                                 str(datetime.timedelta(seconds=time_delta))))
        ppo_epochs_num = hparams.ppo_epochs_num
        ppo_hparams = trainer_lib.create_hparams(
            "atari_base",
            "epochs_num={},simulated_environment=True,eval_every_epochs=0,"
            "save_models_every_epochs={}".format(ppo_epochs_num + 1,
                                                 ppo_epochs_num),
            data_dir=output_dir)
        ppo_hparams.epoch_length = hparams.ppo_epoch_length
        ppo_dir = tempfile.mkdtemp(dir=data_dir, prefix="ppo_")
        in_graph_wrappers = [(TimeLimitWrapper, {
            "timelimit": hparams.ppo_time_limit
        }), (MaxAndSkipWrapper, {
            "skip": 4
        })]
        in_graph_wrappers += gym_problem.in_graph_wrappers
        ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)
        ppo_hparams.num_agents = hparams.ppo_num_agents
        rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name,
                             ppo_dir)

        last_model = ppo_dir + "/model{}.ckpt".format(ppo_epochs_num)
Esempio n. 24
0
def main(argv):
  t2t_trainer.main(argv)
Esempio n. 25
0
def train(hparams, output_dir):
  """Training function."""
  prefix = output_dir
  data_dir = os.path.expanduser(prefix + "/data")
  tmp_dir = os.path.expanduser(prefix + "/tmp")
  output_dir = os.path.expanduser(prefix + "/output")
  autoencoder_dir = os.path.expanduser(prefix + "/autoencoder")
  tf.gfile.MakeDirs(data_dir)
  tf.gfile.MakeDirs(tmp_dir)
  tf.gfile.MakeDirs(output_dir)
  tf.gfile.MakeDirs(autoencoder_dir)
  last_model = ""
  start_time = time.time()
  line = ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>    "
  epoch_metrics = []
  iter_data_dirs = []
  ae_data_dirs = []
  orig_autoencoder_path = FLAGS.autoencoder_path
  for iloop in range(hparams.epochs):
    # Train autoencoder if needed.
    if (hparams.autoencoder_train_steps > 0 and iloop == 0 and
        not orig_autoencoder_path):
      time_delta = time.time() - start_time
      tf.logging.info("%s Step AE - train autoencoder. Time: %s",
                      line, str(datetime.timedelta(seconds=time_delta)))
      with tf.Graph().as_default():
        # Generate data.
        FLAGS.autoencoder_path = ""
        FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
        FLAGS.agent_policy_path = ""
        gym_problem = registry.problem(FLAGS.problem)
        gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
        ae_data_dir = os.path.join(data_dir, "ae%d" % iloop)
        ae_data_dirs.append(ae_data_dir)
        tf.gfile.MakeDirs(ae_data_dir)
        gym_problem.generate_data(ae_data_dir, tmp_dir)
        if ae_data_dirs[:-1]:
          combine_world_model_train_data(gym_problem,
                                         ae_data_dir,
                                         ae_data_dirs[:-1])
        # Train AE.
        FLAGS.data_dir = ae_data_dir
        FLAGS.output_dir = autoencoder_dir
        # TODO(lukaszkaiser): make non-hardcoded here and in gym_problems.py.
        FLAGS.model = "autoencoder_ordered_discrete"
        FLAGS.hparams_set = "autoencoder_discrete_pong"
        FLAGS.train_steps = hparams.autoencoder_train_steps * (iloop + 2)
        FLAGS.eval_steps = 100
        t2t_trainer.main([])
        FLAGS.autoencoder_path = autoencoder_dir

    # Generate random frames.
    if iloop == 0:
      time_delta = time.time() - start_time
      tf.logging.info("%s Step %d.0 - generate random data. Time: %s",
                      line, iloop, str(datetime.timedelta(seconds=time_delta)))
      FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
      FLAGS.agent_policy_path = ""
      gym_problem = registry.problem(FLAGS.problem)
      gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
      iter_data_dir = os.path.join(data_dir, "0random")
      iter_data_dirs.append(iter_data_dir)
      tf.gfile.MakeDirs(iter_data_dir)
      gym_problem.generate_data(iter_data_dir, tmp_dir)
      mean_reward = gym_problem.sum_of_rewards / max(1.0, gym_problem.dones)
      tf.logging.info("%s Step 0.0 random reward: %.4f" % (line, mean_reward))

    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.1 - generate env model. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))

    # Train env model
    FLAGS.data_dir = iter_data_dir
    FLAGS.output_dir = output_dir
    FLAGS.model = hparams.generative_model
    FLAGS.hparams_set = hparams.generative_model_params
    FLAGS.train_steps = hparams.model_train_steps * (iloop + 2)
    FLAGS.eval_steps = 100
    t2t_trainer.main([])

    # Evaluate and dump frames from env model
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.1a - evaluate env model. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))
    gym_simulated_problem = registry.problem(
        "gym_simulated_discrete_problem_with_agent_on_%s" % hparams.game)
    sim_steps = hparams.simulated_env_generator_num_steps
    gym_simulated_problem.settable_num_steps = sim_steps
    gym_simulated_problem.real_env_problem = gym_problem
    gym_simulated_problem.simulation_random_starts = False
    gym_simulated_problem.intrinsic_reward_scale = 0.
    gym_simulated_problem.generate_data(iter_data_dir, tmp_dir)
    model_reward_accuracy = 0.0
    if gym_simulated_problem.dones != 0:
      n = float(gym_simulated_problem.dones)
      model_reward_accuracy = (
          gym_simulated_problem.successful_episode_reward_predictions / n)
    tf.logging.info("%s Step %d.1a env model reward accuracy: %.4f" % (
        line, iloop, model_reward_accuracy))

    # Train PPO agent
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.2 - train PPO in model env. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))

    # Setup PPO hparams
    ppo_hparams = trainer_lib.create_hparams(hparams.ppo_params,
                                             data_dir=output_dir)
    ppo_epochs_num = hparams.ppo_epochs_num
    ppo_hparams.epochs_num = ppo_epochs_num
    ppo_hparams.simulated_environment = True
    ppo_hparams.simulation_random_starts = hparams.simulation_random_starts
    ppo_hparams.intrinsic_reward_scale = hparams.intrinsic_reward_scale
    ppo_hparams.eval_every_epochs = 0
    ppo_hparams.save_models_every_epochs = ppo_epochs_num
    ppo_hparams.epoch_length = hparams.ppo_epoch_length
    ppo_hparams.num_agents = hparams.ppo_num_agents
    ppo_hparams.problem = gym_problem

    in_graph_wrappers = [
        (TimeLimitWrapper, {"timelimit": hparams.ppo_time_limit}),
        (MaxAndSkipWrapper, {"skip": 4})]
    in_graph_wrappers += gym_problem.in_graph_wrappers
    ppo_hparams.add_hparam("in_graph_wrappers", in_graph_wrappers)

    ppo_dir = generator_utils.make_tmp_dir(dir=data_dir, prefix="ppo_")
    rl_trainer_lib.train(ppo_hparams, gym_simulated_problem.env_name, ppo_dir)
    last_model = ppo_dir

    # Evaluate agent.
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.3 - evaluate agent. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))
    FLAGS.problem = "gym_discrete_problem_with_agent_on_%s" % hparams.game
    FLAGS.agent_policy_path = last_model
    eval_gym_problem = registry.problem(FLAGS.problem)
    eval_gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
    eval_gym_problem.eval_runs = 5
    eval_data_dir = os.path.join(data_dir, str(iloop)+"eval")
    iter_data_dirs.append(eval_data_dir)
    tf.gfile.MakeDirs(eval_data_dir)
    eval_gym_problem.generate_data(eval_data_dir, tmp_dir)

    # Generate environment frames.
    time_delta = time.time() - start_time
    tf.logging.info("%s Step %d.4 - generate environment data. Time: %s",
                    line, iloop, str(datetime.timedelta(seconds=time_delta)))
    gym_problem = registry.problem(FLAGS.problem)
    gym_problem.settable_num_steps = hparams.true_env_generator_num_steps
    iter_data_dir = os.path.join(data_dir, str(iloop))
    iter_data_dirs.append(iter_data_dir)
    tf.gfile.MakeDirs(iter_data_dir)
    gym_problem.generate_data(iter_data_dir, tmp_dir)
    combine_world_model_train_data(gym_problem,
                                   iter_data_dir,
                                   iter_data_dirs[:-1])

    mean_reward = 0.0
    if eval_gym_problem.dones != 0:
      mean_reward = eval_gym_problem.sum_of_rewards / float(eval_gym_problem.dones)
    tf.logging.info("%s Step %d mean reward: %.4f" % (line, iloop, mean_reward))

    # Report metrics.
    eval_metrics = {"model_reward_accuracy": model_reward_accuracy,
                    "mean_reward": mean_reward}
    epoch_metrics.append(eval_metrics)

  # Report the evaluation metrics from the final epoch
  return epoch_metrics[-1]
Esempio n. 26
0
            FLAGS.worker_job = '/job:master'
            FLAGS.ps_gpu = FLAGS.number_ps_gpu

            FLAGS.schedule = 'train'
            # FLAGS.schedule='continuous_eval_on_train_data'
            # FLAGS.schedule=continuous_train_and_eval
            # FLAGS.schedule='train_and_evaluate'

            cluster = {'ps': ps_hosts, 'master': worker_hosts}
            os.environ['TF_CONFIG'] = json.dumps({
                'cluster': cluster,
                'task': {
                    'type': 'master',
                    'index': task_index
                },
                'environment': 'cloud',
            })

    FLAGS.problems = PROBLEM
    FLAGS.model = 'transformer'
    FLAGS.hparams_set = 'transformer_librispeech'
    FLAGS.hparams = 'batch_size=%s' % (FLAGS.batch_size)
    FLAGS.train_steps = 2000000
    FLAGS.eval_steps = 100
    FLAGS.save_checkpoints_secs = 100
    FLAGS.output_dir = CHECKPOINTS_PATH
    FLAGS.data_dir = DATA_PATH
    FLAGS.tmp_dir = os.path.expanduser("~/tmp")

    t2t_trainer.main(None)
Esempio n. 27
0
def main(argv):

  if getattr(FLAGS, "brain_jobs", None):
    FLAGS.worker_job = "/job:%s" % FLAGS.brain_job_name

  return t2t_trainer.main(argv)