Esempio n. 1
0
def train_alice(alice_config_ext='',
                env_config_ext='',
                exp_name_ext='',
                exp_name_prefix='',
                results_directory=None):

    if results_directory is None: results_directory = os.getcwd() + '/results/'

    config = importlib.import_module('alice_config' + alice_config_ext)
    env_config = importlib.import_module('env_config' + env_config_ext)

    # run training, and if nans, creep in, train again until they don't
    success = False

    while not success:
        # initialize experiment using config.py
        tf.reset_default_graph()
        #global_step = tf.Variable(0, name = "global_step", trainable = False)
        env_type, env_param, env_exp_name_ext = env_config.get_config()
        agent_param, training_param, experiment_name = config.get_config()
        experiment_name = experiment_name + env_exp_name_ext + exp_name_ext
        if env_type == 'grid':
            env = TwoGoalGridWorld(shape=env_param.shape,
                                   r_correct=env_param.r_correct,
                                   r_incorrect=env_param.r_incorrect,
                                   r_step=env_param.r_step,
                                   r_wall=env_param.r_wall,
                                   p_rand=env_param.p_rand,
                                   goal_locs=env_param.goal_locs,
                                   goal_dist=env_param.goal_dist)
        elif env_type == 'key':
            env = KeyGame(shape=env_param.shape,
                          r_correct=env_param.r_correct,
                          r_incorrect=env_param.r_incorrect,
                          r_step=env_param.r_step,
                          r_wall=env_param.r_wall,
                          p_rand=env_param.p_rand,
                          spawn_locs=env_param.spawn_locs,
                          spawn_dist=env_param.spawn_dist,
                          goal_locs=env_param.goal_locs,
                          goal_dist=env_param.goal_dist,
                          key_locs=env_param.key_locs,
                          master_key_locs=env_param.master_key_locs)
        print('Initialized environment.')
        with tf.variable_scope('alice'):
            alice = TabularREINFORCE(
                env,
                use_action_info=agent_param.use_action_info,
                use_state_info=agent_param.use_state_info)
            print('Initialized agent.')
        saver = tf.train.Saver()

        # run experiment
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            print('Beginning training.')
            stats, success = reinforce(
                env=env,
                agent=alice,
                training_steps=training_param.training_steps,
                learning_rate=training_param.learning_rate,
                entropy_scale=training_param.entropy_scale,
                value_scale=training_param.value_scale,
                action_info_scale=training_param.action_info_scale,
                state_info_scale=training_param.state_info_scale,
                state_count_discount=training_param.state_count_discount,
                state_count_smoothing=training_param.state_count_smoothing,
                discount_factor=training_param.discount_factor,
                max_episode_length=training_param.max_episode_length)
            if success:
                print('Finished training.')
                values = get_values(alice, env, sess)  # state X goal
                print('Extracted values.')
                if alice.use_action_info:
                    action_kls = get_kls(alice, env, sess)  # state X goal
                    print('Extracted kls.')
                else:
                    action_kls = None
                if alice.use_state_info:
                    ps_g = stats.state_goal_counts / np.sum(
                        stats.state_goal_counts, axis=0)
                    ps = np.sum(stats.state_goal_counts, axis=1) / np.sum(
                        stats.state_goal_counts)
                    ps = np.expand_dims(ps, axis=1)
                    lso = np.log2(ps_g / ps)  # state X goal
                    print('Extracted log state odds.')
                else:
                    lso = None

                action_probs = get_action_probs(alice, env,
                                                sess)  # state X goal X action
                print('Extracted policy.')
                # save session
                experiment_directory = exp_name_prefix + datetime.datetime.now(
                ).strftime("%Y_%m_%d_%H%M") + '_' + experiment_name + '/'
                directory = results_directory + experiment_directory
                save_path = saver.save(sess, directory + "alice.ckpt")
                print('')
                print("Model saved in path: %s" % save_path)
            else:
                print('Unsucessful run - restarting.')
                f = open('error.txt', 'a')
                d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p")
                f.write("{}: experiment '{}' failed and reran\n".format(
                    d, exp_name_prefix + experiment_name))
                f.close()
                time.sleep(10)

    # save experiment stats
    total_steps, steps_per_reward = first_time_to(stats.episode_lengths,
                                                  stats.episode_rewards)
    result = Result(episode_lengths=stats.episode_lengths,
                    episode_rewards=stats.episode_rewards,
                    episode_modified_rewards=stats.episode_modified_rewards,
                    episode_keys=stats.episode_keys,
                    values=values,
                    action_kls=action_kls,
                    log_state_odds=lso,
                    action_probs=action_probs,
                    state_goal_counts=stats.state_goal_counts,
                    steps_per_reward=steps_per_reward,
                    total_steps=total_steps)
    if not os.path.exists(directory): os.makedirs(directory)
    with open(directory + 'results.pkl', 'wb') as output:
        pickle.dump(result, output, pickle.HIGHEST_PROTOCOL)
    print('Saved stats.')

    # copy config file to results directory to ensure experiment repeatable
    copy(os.getcwd() + '/alice_config' + alice_config_ext + '.py',
         directory + 'alice_config.py')
    copy(os.getcwd() + '/env_config' + env_config_ext + '.py',
         directory + 'env_config.py')
    print('Copied configs.')

    # plot experiment and save figures
    FigureSizes = namedtuple('FigureSizes',
                             ['figure', 'tick_label', 'axis_label', 'title'])
    figure_sizes = FigureSizes(figure=(50, 25),
                               tick_label=40,
                               axis_label=50,
                               title=60)

    avg_steps_per_reward, _, action_info, state_info = plot_episode_stats(
        stats, figure_sizes, noshow=True, directory=directory)
    if env_type == 'grid':
        k = 15
        print('')
        print('-' * k + 'VALUES' + '-' * k)
        plot_value_map(values,
                       action_probs,
                       env,
                       figure_sizes,
                       noshow=True,
                       directory=directory)
        if action_kls is not None:
            print('')
            print('-' * k + 'KLS' + '-' * k)
            plot_kl_map(action_kls,
                        action_probs,
                        env,
                        figure_sizes,
                        noshow=True,
                        directory=directory)
        if lso is not None:
            print('')
            print('-' * k + 'LSOS' + '-' * k)
            plot_lso_map(lso,
                         action_probs,
                         env,
                         figure_sizes,
                         noshow=True,
                         directory=directory)
            print('')
            print('-' * k + 'STATE DENSITIES' + '-' * k)
            plot_state_densities(stats.state_goal_counts,
                                 action_probs,
                                 env,
                                 figure_sizes,
                                 noshow=True,
                                 directory=directory)
        print('')
        print('-' * k + 'POLICY' + '-' * k)
        print_policy(action_probs, env)
    print('')
    print('FINISHED')

    return avg_steps_per_reward, action_info, state_info, experiment_name
def plot_multiple_experiments(list_of_directories, exp_names_and_colors,
                              figure_sizes, collection_name):
  
  # load results
  results_directory = os.getcwd()+'/results/'
  results = []
  colors = []
  labels = []
  labels_added = set()
  for d in list_of_directories:
    r = pickle.load(open(results_directory+d+'/results.pkl','rb'))
    results.append(r)
    # if directory name contains exp_name, color it with corresponding color
    color_found = False
    for k in exp_names_and_colors.keys():
      if k in d:
        colors.append(exp_names_and_colors[k])
        color_found = True
        if k in labels_added:
          labels.append(None)
        else:
          labels.append(k)
          labels_added.add(k) # will this work?
        break
    if not color_found: raise ValueError('No names in exp_names_and_colors appeared in {}'.format(d))
    
  # plot rewards vs time and write reward rates to text file
  rate_per_what = 100
  f = open(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.txt','w')
  f.write('REWARD RATES PER %i TIME STEPS\n' % rate_per_what)
  fig1 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  f.write("***** BOB *****\n")
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    cumulative_steps = np.cumsum(r.bob.episode_lengths)
    cumulative_rewards = np.cumsum(r.bob.episode_rewards)
    plt.plot(cumulative_steps, cumulative_rewards,
             color = c, linestyle = '-', label = l, linewidth = 8)
    # write reward rates to text file
    N = 10000
    rate = rate_per_what*rate_last_N(cumulative_steps, cumulative_rewards, N = N)
    f.write("'%s': %i (last %i steps)\n" % (d, rate, N))
  # plot alice
  f.write("***** ALICE *****\n")
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    cumulative_steps = np.cumsum(r.alice.episode_lengths)
    cumulative_rewards = np.cumsum(r.alice.episode_rewards)
    plt.plot(cumulative_steps, cumulative_rewards,
             color = c, linestyle = '--', label = None, linewidth = 8)
    # write reward rates to text file
    N = 10000
    rate = rate_per_what*rate_last_N(cumulative_steps, cumulative_rewards, N = N)
    f.write("'%s': %i (last %i steps)\n" % (d, rate, N))
  plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label)
  plt.ylabel("Total Reward", fontsize = figure_sizes.axis_label)
  #plt.xlim((0, np.min(total_steps)))
  #plt.ylim(ymin = 0)
  plt.title("Total Reward over Time", fontsize = figure_sizes.title)
  plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.png', format='png')
  plt.close(fig1)
  
  # plot smoothed episode lengths over time
  window = 1000
  fig2 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    episode_lengths_smoothed = pd.Series(r.bob.episode_lengths).rolling(window, min_periods = window).mean()
    plt.plot(episode_lengths_smoothed,
             color = c, linestyle = '-', label = l, linewidth = 8)
  # plot alice
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    average_episode_length = np.mean(r.alice.episode_lengths)
    plt.axhline(y = average_episode_length,
                color = c, linestyle = '--', label = None, linewidth = 8)
  plt.xlabel("Episode", fontsize = figure_sizes.axis_label)
  plt.ylabel("Episode Length", fontsize = figure_sizes.axis_label)
  plt.title("Episode Length over Time (Smoothed over {} episodes)".format(window), fontsize = figure_sizes.title)
  #plt.xlim((0, np.min(total_steps)))
  plt.ylim(ymin = 0)
  plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.png', format='png')
  plt.close(fig2)  
  
  # Plot time steps per unit reward (smoothed)
  window = 500
  fig3 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
#    total_steps, steps_per_reward = first_time_to(r.bob.episode_lengths, r.bob.episode_rewards)
    total_steps = r.bob.total_steps
    steps_per_reward = r.bob.steps_per_reward
    steps_per_reward_smoothed = pd.Series(steps_per_reward).rolling(window, min_periods = window).mean()
    plt.plot(total_steps, steps_per_reward_smoothed,
             color = c, linestyle = '-', label = l, linewidth = 8)
  # plot alice
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    average_steps_per_reward = np.sum(r.alice.episode_lengths)/np.sum(r.alice.episode_rewards)
    plt.axhline(y = average_steps_per_reward,
                color = c, linestyle = '--', label = None, linewidth = 8)
  plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label)
  plt.ylabel("Time Steps per Reward", fontsize = figure_sizes.axis_label)
#  plt.title("Steps per Reward over Time (Smoothed over approximately {} episodes)".format(window), fontsize = figure_sizes.title) 
  #plt.xlim((0, np.min(total_steps)))
  _, ymax = plt.gca().get_ylim()
  plt.ylim(0, min(2*average_steps_per_reward,ymax))
  plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.png', format='png')
  plt.close(fig3)

  # Plot time steps per unit reward as % of Alice's
  window = 500
  fig4 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  for n in range(len(results)-1,-1,-1):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    total_steps, steps_per_reward = first_time_to(r.bob.episode_lengths, r.bob.episode_rewards)
    average_steps_per_reward = np.sum(r.alice.episode_lengths)/np.sum(r.alice.episode_rewards)
    bob_over_alice = steps_per_reward/average_steps_per_reward
    bob_over_alice_smoothed = pd.Series(bob_over_alice).rolling(window, min_periods = window).mean()
    plt.plot(total_steps, bob_over_alice_smoothed,
             color = c, linestyle = '-', label = l, linewidth = 8)
  plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label)
  plt.ylabel("Bob Normalized Episode Length", fontsize = figure_sizes.axis_label)
#  plt.title("Bob Steps per Reward / Alice's Average (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) 
  #plt.xlim((0, np.min(total_steps)))
  #_, ymax = plt.gca().get_ylim()
  plt.ylim((.95, 2))
  plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.png', format='png')
  plt.close(fig4) 
  
  # Plot percentage of time Bob beats Alice to the goal
  window = 1000
  fig5 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    bob_beats_alice = np.array(r.bob.episode_lengths) < np.array(r.alice.episode_lengths)
    bob_beats_alice[np.array(r.bob.episode_rewards)<0] = 0 # filter out episodes where bob goes to wrong goal
    bob_win_percentage = pd.Series(bob_beats_alice).rolling(window, min_periods = window).mean()
    total_steps = np.cumsum(r.bob.episode_lengths)
    plt.plot(total_steps, bob_win_percentage,
             color = c, linestyle = '-', label = l, linewidth = 8)
  plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label)
  plt.ylabel("% of time Bob beats Alice to goal", fontsize = figure_sizes.axis_label)
#  plt.title("Bob's Win Percentage (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) 
  #plt.xlim((0, np.min(total_steps)))
  #_, ymax = plt.gca().get_ylim()
  plt.ylim((0, 1))
  plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.png', format='png')
  plt.close(fig5)
  
  # Plot percentage of time Bob beats or ties Alice to the goal
  window = 1000
  fig6 = plt.figure(figsize = figure_sizes.figure)
  # plot bob
  for n in range(len(results)):
    r = results[n]
    c = colors[n]
    l = labels[n]
    d = list_of_directories[n]
    bob_beats_alice = np.array(r.bob.episode_lengths) <= np.array(r.alice.episode_lengths)
    bob_beats_alice[np.array(r.bob.episode_rewards)<0] = 0 # filter out episodes where bob goes to wrong goal
    bob_win_percentage = pd.Series(bob_beats_alice).rolling(window, min_periods = window).mean()
    total_steps = np.cumsum(r.bob.episode_lengths)
    plt.plot(total_steps, bob_win_percentage,
             color = c, linestyle = '-', label = l, linewidth = 8)
  plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label)
  plt.ylabel("% of time Bob beats/ties Alice to goal", fontsize = figure_sizes.axis_label)
#  plt.title("Bob's Win+Tie Percentage (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) 
  #plt.xlim((0, np.min(total_steps)))
  #_, ymax = plt.gca().get_ylim()
  plt.ylim((0, 1))
  plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label)
  plt.tick_params(labelsize = figure_sizes.tick_label)  
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.eps', format='eps')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.pdf', format='pdf')
  plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.png', format='png')
  plt.close(fig6) 
  
  return
Esempio n. 3
0
def train_bob(bob_config_ext = '', exp_name_ext = '', exp_name_prefix = '',
              results_directory = None):
  
  if results_directory is None: results_directory = os.getcwd()+'/results/'
  
  # import bob
  config = importlib.import_module('bob_config'+bob_config_ext)
  agent_param, training_param, experiment_name, alice_experiment = config.get_config()
  print('Imported Bob.')
  
  # import alice
  alice_directory = results_directory+alice_experiment+'/'
  alice_config = imp.load_source('alice_config', alice_directory+'alice_config.py')
  alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config()
  print('Imported Alice.')
  
  # import and init env
  env_config = imp.load_source('env_config', alice_directory+'env_config.py')
  env_param, env_exp_name_ext = env_config.get_config()
  experiment_name = experiment_name + env_exp_name_ext + exp_name_ext
  env = TwoGoalGridWorld(shape = env_param.shape,
                         r_correct = env_param.r_correct,
                         r_incorrect = env_param.r_incorrect,
                         r_step = env_param.r_step,
                         r_wall = env_param.r_wall,
                         p_rand = env_param.p_rand,
                         goal_locs = env_param.goal_locs,
                         goal_dist = env_param.goal_dist)
  print('Imported environment.')
   
  # run training, and if nans, creep in, train again until they don't
  success = False
  while not success:

    # initialize alice and bob using configs
    tf.reset_default_graph()
    #global_step = tf.Variable(0, name = "global_step", trainable = False)    
    with tf.variable_scope('alice'):  
      alice = TabularREINFORCE(env,
                               use_action_info = alice_agent_param.use_action_info,
                               use_state_info = alice_agent_param.use_state_info)
      alice_saver = tf.train.Saver()
    with tf.variable_scope('bob'):
      bob = RNNObserver(env = env,
                        shared_layer_sizes = agent_param.shared_layer_sizes,
                        policy_layer_sizes = agent_param.policy_layer_sizes,
                        value_layer_sizes = agent_param.value_layer_sizes,
                        use_RNN = agent_param.use_RNN)
      saver = tf.train.Saver()
    print('Initialized Alice and Bob.')
  
    # run experiment
    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      alice_saver.restore(sess, alice_directory+'alice.ckpt')
      print('Loaded trained Alice.')
      alice_stats, bob_stats, success = reinforce(env = env,
                                                  alice = alice,
                                                  bob = bob,
                                                  training_steps = training_param.training_steps,
                                                  learning_rate = training_param.learning_rate,
                                                  entropy_scale = training_param.entropy_scale,
                                                  value_scale = training_param.value_scale,
                                                  discount_factor = training_param.discount_factor,
                                                  max_episode_length = training_param.max_episode_length,
                                                  bob_goal_access = training_param.bob_goal_access)
      if success:
        print('Finished training.')
        # save session
        experiment_directory = exp_name_prefix+datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")+'_'+experiment_name+'/'
        directory = results_directory + experiment_directory
        print('Saving results in %s.' % directory)
        if not os.path.exists(directory+'bob/'): os.makedirs(directory+'bob/')
        save_path = saver.save(sess, directory+'bob/bob.ckpt')
        print('Saved bob to %s.' % save_path)
      else:
        print('Unsucessful run - restarting.')
        f = open('error.txt','a')
        d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p")
        f.write("{}: experiment '{}' failed and reran\n".format(d, exp_name_prefix+experiment_name))
        f.close()
  
  # save experiment stats
  print('Building Alice stats.')
  alice_total_steps, alice_steps_per_reward = first_time_to(alice_stats.episode_lengths,
                                                            alice_stats.episode_rewards)
  a = Stats(episode_lengths = alice_stats.episode_lengths,
            episode_rewards = alice_stats.episode_rewards,
            episode_action_kl = alice_stats.episode_action_kl,
            episode_lso = alice_stats.episode_lso,
            state_goal_counts = alice_stats.state_goal_counts,
            steps_per_reward = alice_steps_per_reward,
            total_steps = alice_total_steps)  
  print('Building Bob stats.')
  bob_total_steps, bob_steps_per_reward = first_time_to(bob_stats.episode_lengths,
                                                        bob_stats.episode_rewards)
  b = Stats(episode_lengths = bob_stats.episode_lengths,
            episode_rewards = bob_stats.episode_rewards,
            episode_action_kl = None,
            episode_lso = None,
            state_goal_counts = None,
            steps_per_reward = bob_steps_per_reward,
            total_steps = bob_total_steps)
  
  result = Result(alice = a, bob = b)
  if not os.path.exists(directory): os.makedirs(directory)
  with open(directory+'results.pkl', 'wb') as output:
    # copy to locally-defined Stats objects to make pickle happy
    pickle.dump(result, output, pickle.HIGHEST_PROTOCOL)
  print('Saved stats.')
  
  # copy config file to results directory to ensure experiment repeatable
  copy(os.getcwd()+'/bob_config'+bob_config_ext+'.py', directory+'bob_config.py')
  copy(os.getcwd()+'/env_config.py', directory)
  copy(alice_directory+'alice_config.py', directory)
  print('Copied configs.')
  
  # copy alice checkpoint used
  if not os.path.exists(directory+'alice/'): os.makedirs(directory+'alice/')
  for file in glob.glob(alice_directory+'alice.ckpt*'):
    copy(file, directory+'alice/')
  copy(alice_directory+'checkpoint', directory+'alice/')
  print('Copied Alice.')
      
  # plot experiment and save figures
  FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title'])
  figure_sizes = FigureSizes(figure = (50,25),
                             tick_label = 40,
                             axis_label = 50,
                             title = 60)
  avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info = plot_episode_stats(result,
                                                                                                 figure_sizes,
                                                                                                 noshow = True,
                                                                                                 directory = directory)
  print('Figures saved.')
  print('\nAll results saved in {}'.format(directory))
  
  return avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info, experiment_name
Esempio n. 4
0
def plot_episode_stats(stats, figure_sizes, noshow=False, directory=None):

    if type(stats).__name__ == 'Result':
        alice = stats.alice
        stats = stats.bob
        two_agents = True
    else:
        alice = stats
        two_agents = False

    # Plot the episode length over time (smoothed)
    window = 500
    fig0 = plt.figure(figsize=figure_sizes.figure)
    episode_lengths_smoothed = pd.Series(stats.episode_lengths).rolling(
        window, min_periods=window).mean()
    plt.plot(episode_lengths_smoothed, label='bob')
    if two_agents:
        episode_lengths_smoothed = pd.Series(alice.episode_lengths).rolling(
            window, min_periods=window).mean()
        plt.plot(episode_lengths_smoothed, label='alice')
        plt.legend(loc='upper right', fontsize=figure_sizes.axis_label)
    plt.xlabel("Episode", fontsize=figure_sizes.axis_label)
    plt.ylabel("Episode Length", fontsize=figure_sizes.axis_label)
    plt.ylim(ymin=0)
    plt.title(
        "Episode Length over Time (Smoothed over {} episodes)".format(window),
        fontsize=figure_sizes.title)
    plt.tick_params(labelsize=figure_sizes.tick_label)
    if directory:
        plt.savefig(directory + 'smoothed_episode_lengths.pdf', format='pdf')
        plt.savefig(directory + 'smoothed_episode_lengths.png', format='png')
    if noshow: plt.close(fig0)
    else: plt.show(fig0)

    # Plot the episode length over time
    fig1 = plt.figure(figsize=figure_sizes.figure)
    plt.plot(stats.episode_lengths, label='bob')
    if two_agents:
        plt.plot(alice.episode_lengths, label='alice')
        plt.legend(loc='upper right', fontsize=figure_sizes.axis_label)
    plt.xlabel("Episode", fontsize=figure_sizes.axis_label)
    plt.ylabel("Episode Length", fontsize=figure_sizes.axis_label)
    plt.ylim(ymin=0)
    plt.title("Episode Length over Time", fontsize=figure_sizes.title)
    plt.tick_params(labelsize=figure_sizes.tick_label)
    if directory:
        plt.savefig(directory + 'episode_lengths.pdf', format='pdf')
        plt.savefig(directory + 'episode_lengths.png', format='png')
    if noshow: plt.close(fig1)
    else: plt.show(fig1)

    # Plot the episode reward per episode
    window = 10
    fig2 = plt.figure(figsize=figure_sizes.figure)
    rewards_smoothed = pd.Series(stats.episode_rewards).rolling(
        window, min_periods=window).mean()
    plt.plot(rewards_smoothed, label='bob')
    if two_agents:
        rewards_smoothed = pd.Series(alice.episode_rewards).rolling(
            window, min_periods=window).mean()
        plt.plot(rewards_smoothed, label='alice')
        plt.legend(loc='lower right', fontsize=figure_sizes.axis_label)
    plt.xlabel("Episode", fontsize=figure_sizes.axis_label)
    plt.ylabel("Episode Reward (Smoothed)", fontsize=figure_sizes.axis_label)
    plt.title("Episode Reward over Time (Smoothed over window size {})".format(
        window),
              fontsize=figure_sizes.title)
    plt.tick_params(labelsize=figure_sizes.tick_label)
    if directory:
        plt.savefig(directory + 'episode_rewards.pdf', format='pdf')
        plt.savefig(directory + 'episode_rewards.png', format='png')
    if noshow: plt.close(fig2)
    else: plt.show(fig2)

    # Plot the episode reward per time step
    fig3 = plt.figure(figsize=figure_sizes.figure)
    rate_per_what = 100
    N = 10000
    cumulative_steps = np.cumsum(stats.episode_lengths)
    cumulative_rewards = np.cumsum(stats.episode_rewards)
    r = rate_per_what * rate_last_N(cumulative_steps, cumulative_rewards, N=N)
    title = 'Reward per %i steps (last %i steps): %i' % (rate_per_what, N, r)
    plt.plot(cumulative_steps, cumulative_rewards, linewidth=8, label='bob')
    if two_agents:
        cumulative_steps = np.cumsum(alice.episode_lengths)
        cumulative_rewards = np.cumsum(alice.episode_rewards)
        r_alice = rate_per_what * rate_last_N(
            cumulative_steps, cumulative_rewards, N=N)
        title = 'Reward per %i steps (last %i steps): Bob %i, Alice %i' % (
            rate_per_what, N, r, r_alice)
        plt.plot(cumulative_steps,
                 cumulative_rewards,
                 linewidth=8,
                 label='alice')
        plt.legend(loc='upper left', fontsize=figure_sizes.axis_label)
    plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label)
    plt.ylabel("Total Reward", fontsize=figure_sizes.axis_label)
    plt.title(title, fontsize=figure_sizes.title)
    plt.tick_params(labelsize=figure_sizes.tick_label)
    if directory:
        plt.savefig(directory + 'reward_per_timestep.pdf', format='pdf')
        plt.savefig(directory + 'reward_per_timestep.png', format='png')
    if noshow: plt.close(fig3)
    else: plt.show(fig3)

    if alice.episode_action_kl is not None:
        # Plot a rolling estimate of I(action;goal|state)
        window = 1000  # measure in episodes
        fig4 = plt.figure(figsize=figure_sizes.figure)
        cumulative_steps = np.cumsum(alice.episode_lengths)
        info_smoothed = pd.Series(
            np.asarray(alice.episode_action_kl) /
            np.asarray(alice.episode_lengths)).rolling(
                window, min_periods=window).mean()
        N = 10000
        action_info = mean_last_N(cumulative_steps, info_smoothed, N=N)
        plt.plot(cumulative_steps, info_smoothed)
        plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label)
        plt.ylabel("I(action;goal|state)", fontsize=figure_sizes.axis_label)
        plt.title(
            "Info estimated over sliding window of {} episodes".format(window),
            fontsize=figure_sizes.title)
        plt.tick_params(labelsize=figure_sizes.tick_label)
        if directory:
            plt.savefig(directory + 'action_info.pdf', format='pdf')
            plt.savefig(directory + 'action_info.png', format='png')
        if noshow: plt.close(fig4)
        else: plt.show(fig4)
    else:
        fig4 = None
        action_info = None

    if alice.episode_lso is not None:
        # Plot a rolling estimate of I(state;goal)
        window = 1000  # measure in episodes
        fig5 = plt.figure(figsize=figure_sizes.figure)
        cumulative_steps = np.cumsum(alice.episode_lengths)
        info_smoothed = pd.Series(
            np.asarray(alice.episode_lso) /
            np.asarray(alice.episode_lengths)).rolling(
                window, min_periods=window).mean()
        N = 10000
        state_info = mean_last_N(cumulative_steps, info_smoothed, N=N)
        plt.plot(cumulative_steps, info_smoothed)
        plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label)
        plt.ylabel("I(state;goal)", fontsize=figure_sizes.axis_label)
        plt.title(
            "Info estimated over sliding window of {} episodes".format(window),
            fontsize=figure_sizes.title)
        plt.tick_params(labelsize=figure_sizes.tick_label)
        if directory:
            plt.savefig(directory + 'state_info.pdf', format='pdf')
            plt.savefig(directory + 'state_info.png', format='png')
        if noshow: plt.close(fig5)
        else: plt.show(fig5)
    else:
        fig5 = None
        state_info = None

    # Plot time steps per unit reward (smoothed)
    window = 500
    fig6 = plt.figure(figsize=figure_sizes.figure)
    total_steps, steps_per_reward = first_time_to(stats.episode_lengths,
                                                  stats.episode_rewards)
    N = 10000
    average_steps_per_reward = mean_last_N(total_steps, steps_per_reward, N=N)
    steps_per_reward_smoothed = pd.Series(steps_per_reward).rolling(
        window, min_periods=window).mean()
    if two_agents: lab = 'bob'
    else: lab = 'alice'
    plt.plot(total_steps,
             steps_per_reward_smoothed,
             color='b',
             label=lab,
             linewidth=8)
    if two_agents:
        average_steps_per_reward_alice = np.sum(
            alice.episode_lengths) / np.sum(alice.episode_rewards)
        plt.axhline(y=average_steps_per_reward_alice,
                    color='r',
                    label='alice',
                    linewidth=8)
        plt.legend(loc='upper right', fontsize=figure_sizes.axis_label)
        _, ymax = plt.gca().get_ylim()
        plt.ylim(0, min(6 * average_steps_per_reward_alice, ymax))
        tit = "Smoothed over ~%i episodes, Mean (last %i steps): Bob %.1f, Alice %.1f" % (
            window, N, average_steps_per_reward,
            average_steps_per_reward_alice)
    else:
        average_steps_per_reward_alice = None
        tit = "Smoothed over ~%i episodes, Mean (last %i steps): %.1f" % (
            window, N, average_steps_per_reward)
    plt.title(tit, fontsize=figure_sizes.title)
    plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label)
    plt.ylabel("Time Steps per Reward", fontsize=figure_sizes.axis_label)
    plt.tick_params(labelsize=figure_sizes.tick_label)
    if directory:
        plt.savefig(directory + 'steps_per_reward.pdf', format='pdf')
        plt.savefig(directory + 'steps_per_reward.png', format='png')
    if noshow: plt.close(fig6)
    else: plt.show(fig6)

    return average_steps_per_reward, average_steps_per_reward_alice, action_info, state_info