def watch(sess_): with sess_.as_default(): env_monitor = GymEnvironment(env_name=env_name, log_path=None, render=True) info_ = env_monitor.reset()[brain_name] trainer_monitor = Trainer(ppo_model, sess_, info_, is_continuous, use_observations, use_states, False) steps_ = sess_.run([ppo_model.global_step]) print("Starting watcher.") while True: done = False info_ = env_monitor.reset()[brain_name] while not done: info_ = trainer_monitor.take_action(info_, env_monitor, brain_name, steps_, normalize, stochastic=False) done = info_.local_done[0]
init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=keep_checkpoints) with tf.Session() as sess: # Instantiate model parameters if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(model_path) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) steps = sess.run(ppo_model.global_step) summary_writer = tf.summary.FileWriter(summary_path) info = env.reset(train_mode=train_model)[brain_name] trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states) while steps <= max_steps or not train_model: if env.global_done: info = env.reset(train_mode=train_model)[brain_name] # Decide and take an action new_info = trainer.take_action(info, env, brain_name) info = new_info trainer.process_experiences(info, time_horizon, gamma, lambd) if len(trainer.training_buffer['actions'] ) > buffer_size and train_model: # Perform gradient descent with experience buffer trainer.update_model(batch_size, num_epoch) if steps % summary_freq == 0 and steps != 0 and train_model: # Write training statistics to tensorboard. trainer.write_summary(summary_writer, steps) if steps % save_freq == 0 and steps != 0 and train_model:
with tf.Session() as sess: # Instantiate model parameters if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(model_path) if ckpt == None: print('The model {0} could not be found. Make sure you specified the right ' '--run-path'.format(model_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward]) summary_writer = tf.summary.FileWriter(summary_path) info = env.reset(train_mode=train_model, progress=get_progress())[brain_name] trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model) if train_model: trainer.write_text(summary_writer, 'Hyperparameters', options, steps) while steps <= max_steps or not train_model: if env.global_done: info = env.reset(train_mode=train_model, progress=get_progress())[brain_name] trainer.reset_buffers(info, total=True) # Decide and take an action new_info = trainer.take_action(info, env, brain_name, steps, normalize) info = new_info trainer.process_experiences(info, time_horizon, gamma, lambd) if len(trainer.training_buffer['actions']) > buffer_size and train_model: # Perform gradient descent with experience buffer trainer.update_model(batch_size, num_epoch) if steps % summary_freq == 0 and steps != 0 and train_model: # Write training statistics to tensorboard.
if load_model: print('Loading Model...') ckpt = tf.train.get_checkpoint_state(model_path) if ckpt is None: print( 'The model {0} could not be found. Make sure you specified the right --run-path' .format(model_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) steps, last_reward = sess.run( [ppo_model.global_step, ppo_model.last_reward]) summary_writer = tf.summary.FileWriter(summary_path) info = env.reset()[brain_name] trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model) trainer_monitor = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, False) render_started = False while steps <= max_steps or not train_model: if env.global_done: info = env.reset()[brain_name] trainer.reset_buffers(info, total=True) # Decide and take an action info = trainer.take_action(info, env, brain_name, steps, normalize_steps, stochastic=True)