Beispiel #1
0
def play_from_directory(experiment_name):

    cwd = os.getcwd()
    directory = cwd + '/results/' + experiment_name + '/'
    os.chdir(directory)
    #sys.path.append('/results/'+experiment_name)

    # unpickle results
    results = pickle.load(open(directory + 'results.pkl', 'rb'))

    # import configs
    import alice_config
    alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config(
    )
    import env_config
    env_param, _ = env_config.get_config()
    import bob_config
    agent_param, training_param, experiment_name, alice_experiment = bob_config.get_config(
    )

    # initialize experiment using configs
    tf.reset_default_graph()
    #global_step = tf.Variable(0, name = "global_step", trainable = False)
    env = TwoGoalGridWorld(shape=env_param.shape,
                           r_correct=env_param.r_correct,
                           r_incorrect=env_param.r_incorrect,
                           r_step=env_param.r_step,
                           r_wall=env_param.r_wall,
                           p_rand=env_param.p_rand,
                           goal_locs=env_param.goal_locs,
                           goal_dist=env_param.goal_dist)
    with tf.variable_scope('alice'):
        alice = TabularREINFORCE(
            env=env,
            use_action_info=alice_agent_param.use_action_info,
            use_state_info=alice_agent_param.use_state_info)
        #alice_saver = tf.train.Saver()
    with tf.variable_scope('bob'):
        bob = RNNObserver(env=env,
                          shared_layer_sizes=agent_param.shared_layer_sizes,
                          policy_layer_sizes=agent_param.policy_layer_sizes,
                          value_layer_sizes=agent_param.value_layer_sizes,
                          use_RNN=agent_param.use_RNN)
        bob_saver = tf.train.Saver()

    # simulate an episode
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        #alice_saver.restore(sess, directory+'alice/alice.ckpt')
        bob_saver.restore(sess, directory + 'bob/bob.ckpt')
        play(env=env,
             alice=alice,
             bob=bob,
             results=results,
             bob_goal_access=training_param.bob_goal_access,
             gamma=training_param.discount_factor)

    os.chdir(cwd)

    return
Beispiel #2
0
import sys
if "../" not in sys.path:
    sys.path.append("../")
from envs.TwoGoalGridWorld import TwoGoalGridWorld

UP = 0
RIGHT = 1
DOWN = 2
LEFT = 3
STAY = 4
env = TwoGoalGridWorld(shape=[3, 4],
                       r_correct=1,
                       r_incorrect=-1,
                       r_step=0,
                       r_wall=-.1,
                       p_rand=0,
                       goal_locs=None,
                       goal_dist=None)

#print('move left into wall')
#print(env.P[4][LEFT])
#print(env.P[8][LEFT])
#print('move right from left wall')
#print(env.P[4][RIGHT])
#print(env.P[8][RIGHT])
#print('move right into wall')
#print(env.P[7][LEFT])
#print(env.P[11][LEFT])
#print('move left from right wall')
#print(env.P[7][RIGHT])
#print(env.P[11][RIGHT])
Beispiel #3
0
def train_alice(alice_config_ext='',
                env_config_ext='',
                exp_name_ext='',
                exp_name_prefix='',
                results_directory=None):

    if results_directory is None: results_directory = os.getcwd() + '/results/'

    config = importlib.import_module('alice_config' + alice_config_ext)
    env_config = importlib.import_module('env_config' + env_config_ext)

    # run training, and if nans, creep in, train again until they don't
    success = False

    while not success:
        # initialize experiment using config.py
        tf.reset_default_graph()
        #global_step = tf.Variable(0, name = "global_step", trainable = False)
        env_type, env_param, env_exp_name_ext = env_config.get_config()
        agent_param, training_param, experiment_name = config.get_config()
        experiment_name = experiment_name + env_exp_name_ext + exp_name_ext
        if env_type == 'grid':
            env = TwoGoalGridWorld(shape=env_param.shape,
                                   r_correct=env_param.r_correct,
                                   r_incorrect=env_param.r_incorrect,
                                   r_step=env_param.r_step,
                                   r_wall=env_param.r_wall,
                                   p_rand=env_param.p_rand,
                                   goal_locs=env_param.goal_locs,
                                   goal_dist=env_param.goal_dist)
        elif env_type == 'key':
            env = KeyGame(shape=env_param.shape,
                          r_correct=env_param.r_correct,
                          r_incorrect=env_param.r_incorrect,
                          r_step=env_param.r_step,
                          r_wall=env_param.r_wall,
                          p_rand=env_param.p_rand,
                          spawn_locs=env_param.spawn_locs,
                          spawn_dist=env_param.spawn_dist,
                          goal_locs=env_param.goal_locs,
                          goal_dist=env_param.goal_dist,
                          key_locs=env_param.key_locs,
                          master_key_locs=env_param.master_key_locs)
        print('Initialized environment.')
        with tf.variable_scope('alice'):
            alice = TabularREINFORCE(
                env,
                use_action_info=agent_param.use_action_info,
                use_state_info=agent_param.use_state_info)
            print('Initialized agent.')
        saver = tf.train.Saver()

        # run experiment
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print('Beginning training.')
            stats, success = reinforce(
                env=env,
                agent=alice,
                training_steps=training_param.training_steps,
                learning_rate=training_param.learning_rate,
                entropy_scale=training_param.entropy_scale,
                value_scale=training_param.value_scale,
                action_info_scale=training_param.action_info_scale,
                state_info_scale=training_param.state_info_scale,
                state_count_discount=training_param.state_count_discount,
                state_count_smoothing=training_param.state_count_smoothing,
                discount_factor=training_param.discount_factor,
                max_episode_length=training_param.max_episode_length)
            if success:
                print('Finished training.')
                values = get_values(alice, env, sess)  # state X goal
                print('Extracted values.')
                if alice.use_action_info:
                    action_kls = get_kls(alice, env, sess)  # state X goal
                    print('Extracted kls.')
                else:
                    action_kls = None
                if alice.use_state_info:
                    ps_g = stats.state_goal_counts / np.sum(
                        stats.state_goal_counts, axis=0)
                    ps = np.sum(stats.state_goal_counts, axis=1) / np.sum(
                        stats.state_goal_counts)
                    ps = np.expand_dims(ps, axis=1)
                    lso = np.log2(ps_g / ps)  # state X goal
                    print('Extracted log state odds.')
                else:
                    lso = None

                action_probs = get_action_probs(alice, env,
                                                sess)  # state X goal X action
                print('Extracted policy.')
                # save session
                experiment_directory = exp_name_prefix + datetime.datetime.now(
                ).strftime("%Y_%m_%d_%H%M") + '_' + experiment_name + '/'
                directory = results_directory + experiment_directory
                save_path = saver.save(sess, directory + "alice.ckpt")
                print('')
                print("Model saved in path: %s" % save_path)
            else:
                print('Unsucessful run - restarting.')
                f = open('error.txt', 'a')
                d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p")
                f.write("{}: experiment '{}' failed and reran\n".format(
                    d, exp_name_prefix + experiment_name))
                f.close()
                time.sleep(10)

    # save experiment stats
    total_steps, steps_per_reward = first_time_to(stats.episode_lengths,
                                                  stats.episode_rewards)
    result = Result(episode_lengths=stats.episode_lengths,
                    episode_rewards=stats.episode_rewards,
                    episode_modified_rewards=stats.episode_modified_rewards,
                    episode_keys=stats.episode_keys,
                    values=values,
                    action_kls=action_kls,
                    log_state_odds=lso,
                    action_probs=action_probs,
                    state_goal_counts=stats.state_goal_counts,
                    steps_per_reward=steps_per_reward,
                    total_steps=total_steps)
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + 'results.pkl', 'wb') as output:
        pickle.dump(result, output, pickle.HIGHEST_PROTOCOL)
    print('Saved stats.')

    # copy config file to results directory to ensure experiment repeatable
    copy(os.getcwd() + '/alice_config' + alice_config_ext + '.py',
         directory + 'alice_config.py')
    copy(os.getcwd() + '/env_config' + env_config_ext + '.py',
         directory + 'env_config.py')
    print('Copied configs.')

    # plot experiment and save figures
    FigureSizes = namedtuple('FigureSizes',
                             ['figure', 'tick_label', 'axis_label', 'title'])
    figure_sizes = FigureSizes(figure=(50, 25),
                               tick_label=40,
                               axis_label=50,
                               title=60)

    avg_steps_per_reward, _, action_info, state_info = plot_episode_stats(
        stats, figure_sizes, noshow=True, directory=directory)
    if env_type == 'grid':
        k = 15
        print('')
        print('-' * k + 'VALUES' + '-' * k)
        plot_value_map(values,
                       action_probs,
                       env,
                       figure_sizes,
                       noshow=True,
                       directory=directory)
        if action_kls is not None:
            print('')
            print('-' * k + 'KLS' + '-' * k)
            plot_kl_map(action_kls,
                        action_probs,
                        env,
                        figure_sizes,
                        noshow=True,
                        directory=directory)
        if lso is not None:
            print('')
            print('-' * k + 'LSOS' + '-' * k)
            plot_lso_map(lso,
                         action_probs,
                         env,
                         figure_sizes,
                         noshow=True,
                         directory=directory)
            print('')
            print('-' * k + 'STATE DENSITIES' + '-' * k)
            plot_state_densities(stats.state_goal_counts,
                                 action_probs,
                                 env,
                                 figure_sizes,
                                 noshow=True,
                                 directory=directory)
        print('')
        print('-' * k + 'POLICY' + '-' * k)
        print_policy(action_probs, env)
    print('')
    print('FINISHED')

    return avg_steps_per_reward, action_info, state_info, experiment_name
    os.chdir("..")
    directory = os.getcwd() + '/results/' + experiment + '/'
    r = pickle.load(open(directory + 'results.pkl', 'rb'))
    values = r.values
    action_probs = r.action_probs
    action_kls = r.action_kls
    lso = r.log_state_odds
    state_goal_counts = r.state_goal_counts
    # load env
    env_config = imp.load_source('env_config', directory + 'env_config.py')
    env_type, env_param, env_exp_name_ext = env_config.get_config()
    if env_type == 'grid':
        env = TwoGoalGridWorld(shape=env_param.shape,
                               r_correct=env_param.r_correct,
                               r_incorrect=env_param.r_incorrect,
                               r_step=env_param.r_step,
                               r_wall=env_param.r_wall,
                               p_rand=env_param.p_rand,
                               goal_locs=env_param.goal_locs,
                               goal_dist=env_param.goal_dist)
    else:
        raise ValueError('Invalid env.')

    # figure sizes
    FigureSizes = namedtuple('FigureSizes',
                             ['figure', 'tick_label', 'axis_label', 'title'])
    figure_sizes = FigureSizes(figure=(50, 25),
                               tick_label=40,
                               axis_label=50,
                               title=60)
    # do the plots
    k = 15
Beispiel #5
0
def train_bob(bob_config_ext = '', exp_name_ext = '', exp_name_prefix = '',
              results_directory = None):
  
  if results_directory is None: results_directory = os.getcwd()+'/results/'
  
  # import bob
  config = importlib.import_module('bob_config'+bob_config_ext)
  agent_param, training_param, experiment_name, alice_experiment = config.get_config()
  print('Imported Bob.')
  
  # import alice
  alice_directory = results_directory+alice_experiment+'/'
  alice_config = imp.load_source('alice_config', alice_directory+'alice_config.py')
  alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config()
  print('Imported Alice.')
  
  # import and init env
  env_config = imp.load_source('env_config', alice_directory+'env_config.py')
  env_param, env_exp_name_ext = env_config.get_config()
  experiment_name = experiment_name + env_exp_name_ext + exp_name_ext
  env = TwoGoalGridWorld(shape = env_param.shape,
                         r_correct = env_param.r_correct,
                         r_incorrect = env_param.r_incorrect,
                         r_step = env_param.r_step,
                         r_wall = env_param.r_wall,
                         p_rand = env_param.p_rand,
                         goal_locs = env_param.goal_locs,
                         goal_dist = env_param.goal_dist)
  print('Imported environment.')
   
  # run training, and if nans, creep in, train again until they don't
  success = False
  while not success:

    # initialize alice and bob using configs
    tf.reset_default_graph()
    #global_step = tf.Variable(0, name = "global_step", trainable = False)    
    with tf.variable_scope('alice'):  
      alice = TabularREINFORCE(env,
                               use_action_info = alice_agent_param.use_action_info,
                               use_state_info = alice_agent_param.use_state_info)
      alice_saver = tf.train.Saver()
    with tf.variable_scope('bob'):
      bob = RNNObserver(env = env,
                        shared_layer_sizes = agent_param.shared_layer_sizes,
                        policy_layer_sizes = agent_param.policy_layer_sizes,
                        value_layer_sizes = agent_param.value_layer_sizes,
                        use_RNN = agent_param.use_RNN)
      saver = tf.train.Saver()
    print('Initialized Alice and Bob.')
  
    # run experiment
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      alice_saver.restore(sess, alice_directory+'alice.ckpt')
      print('Loaded trained Alice.')
      alice_stats, bob_stats, success = reinforce(env = env,
                                                  alice = alice,
                                                  bob = bob,
                                                  training_steps = training_param.training_steps,
                                                  learning_rate = training_param.learning_rate,
                                                  entropy_scale = training_param.entropy_scale,
                                                  value_scale = training_param.value_scale,
                                                  discount_factor = training_param.discount_factor,
                                                  max_episode_length = training_param.max_episode_length,
                                                  bob_goal_access = training_param.bob_goal_access)
      if success:
        print('Finished training.')
        # save session
        experiment_directory = exp_name_prefix+datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")+'_'+experiment_name+'/'
        directory = results_directory + experiment_directory
        print('Saving results in %s.' % directory)
        if not os.path.exists(directory+'bob/'): os.makedirs(directory+'bob/')
        save_path = saver.save(sess, directory+'bob/bob.ckpt')
        print('Saved bob to %s.' % save_path)
      else:
        print('Unsucessful run - restarting.')
        f = open('error.txt','a')
        d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p")
        f.write("{}: experiment '{}' failed and reran\n".format(d, exp_name_prefix+experiment_name))
        f.close()
  
  # save experiment stats
  print('Building Alice stats.')
  alice_total_steps, alice_steps_per_reward = first_time_to(alice_stats.episode_lengths,
                                                            alice_stats.episode_rewards)
  a = Stats(episode_lengths = alice_stats.episode_lengths,
            episode_rewards = alice_stats.episode_rewards,
            episode_action_kl = alice_stats.episode_action_kl,
            episode_lso = alice_stats.episode_lso,
            state_goal_counts = alice_stats.state_goal_counts,
            steps_per_reward = alice_steps_per_reward,
            total_steps = alice_total_steps)  
  print('Building Bob stats.')
  bob_total_steps, bob_steps_per_reward = first_time_to(bob_stats.episode_lengths,
                                                        bob_stats.episode_rewards)
  b = Stats(episode_lengths = bob_stats.episode_lengths,
            episode_rewards = bob_stats.episode_rewards,
            episode_action_kl = None,
            episode_lso = None,
            state_goal_counts = None,
            steps_per_reward = bob_steps_per_reward,
            total_steps = bob_total_steps)
  
  result = Result(alice = a, bob = b)
  if not os.path.exists(directory): os.makedirs(directory)
  with open(directory+'results.pkl', 'wb') as output:
    # copy to locally-defined Stats objects to make pickle happy
    pickle.dump(result, output, pickle.HIGHEST_PROTOCOL)
  print('Saved stats.')
  
  # copy config file to results directory to ensure experiment repeatable
  copy(os.getcwd()+'/bob_config'+bob_config_ext+'.py', directory+'bob_config.py')
  copy(os.getcwd()+'/env_config.py', directory)
  copy(alice_directory+'alice_config.py', directory)
  print('Copied configs.')
  
  # copy alice checkpoint used
  if not os.path.exists(directory+'alice/'): os.makedirs(directory+'alice/')
  for file in glob.glob(alice_directory+'alice.ckpt*'):
    copy(file, directory+'alice/')
  copy(alice_directory+'checkpoint', directory+'alice/')
  print('Copied Alice.')
      
  # plot experiment and save figures
  FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title'])
  figure_sizes = FigureSizes(figure = (50,25),
                             tick_label = 40,
                             axis_label = 50,
                             title = 60)
  avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info = plot_episode_stats(result,
                                                                                                 figure_sizes,
                                                                                                 noshow = True,
                                                                                                 directory = directory)
  print('Figures saved.')
  print('\nAll results saved in {}'.format(directory))
  
  return avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info, experiment_name