Exemple #1
0
def watch(sess_):
    with sess_.as_default():
        env_monitor = GymEnvironment(env_name=env_name,
                                     log_path=None,
                                     render=True)
        info_ = env_monitor.reset()[brain_name]
        trainer_monitor = Trainer(ppo_model, sess_, info_, is_continuous,
                                  use_observations, use_states, False)
        steps_ = sess_.run([ppo_model.global_step])

        print("Starting watcher.")
        while True:
            done = False
            info_ = env_monitor.reset()[brain_name]
            while not done:
                info_ = trainer_monitor.take_action(info_,
                                                    env_monitor,
                                                    brain_name,
                                                    steps_,
                                                    normalize,
                                                    stochastic=False)
                done = info_.local_done[0]
Exemple #2
0
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=keep_checkpoints)

with tf.Session() as sess:
    # Instantiate model parameters
    if load_model:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(init)
    steps = sess.run(ppo_model.global_step)
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset(train_mode=train_model)[brain_name]
    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations,
                      use_states)
    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset(train_mode=train_model)[brain_name]
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name)
        info = new_info
        trainer.process_experiences(info, time_horizon, gamma, lambd)
        if len(trainer.training_buffer['actions']
               ) > buffer_size and train_model:
            # Perform gradient descent with experience buffer
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
            trainer.write_summary(summary_writer, steps)
        if steps % save_freq == 0 and steps != 0 and train_model:
Exemple #3
0
with tf.Session() as sess:
    # Instantiate model parameters
    if load_model:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        if ckpt == None:
          print('The model {0} could not be found. Make sure you specified the right '
            '--run-path'.format(model_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(init)
    steps, last_reward = sess.run([ppo_model.global_step, ppo_model.last_reward])
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states, train_model)
    if train_model:
        trainer.write_text(summary_writer, 'Hyperparameters', options, steps)
    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset(train_mode=train_model, progress=get_progress())[brain_name]
            trainer.reset_buffers(info, total=True)
        # Decide and take an action
        new_info = trainer.take_action(info, env, brain_name, steps, normalize)
        info = new_info
        trainer.process_experiences(info, time_horizon, gamma, lambd)
        if len(trainer.training_buffer['actions']) > buffer_size and train_model:
            # Perform gradient descent with experience buffer
            trainer.update_model(batch_size, num_epoch)
        if steps % summary_freq == 0 and steps != 0 and train_model:
            # Write training statistics to tensorboard.
Exemple #4
0
    if load_model:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(model_path)
        if ckpt is None:
            print(
                'The model {0} could not be found. Make sure you specified the right --run-path'
                .format(model_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(init)

    steps, last_reward = sess.run(
        [ppo_model.global_step, ppo_model.last_reward])
    summary_writer = tf.summary.FileWriter(summary_path)
    info = env.reset()[brain_name]
    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations,
                      use_states, train_model)
    trainer_monitor = Trainer(ppo_model, sess, info, is_continuous,
                              use_observations, use_states, False)
    render_started = False

    while steps <= max_steps or not train_model:
        if env.global_done:
            info = env.reset()[brain_name]
            trainer.reset_buffers(info, total=True)
        # Decide and take an action
        info = trainer.take_action(info,
                                   env,
                                   brain_name,
                                   steps,
                                   normalize_steps,
                                   stochastic=True)