def train_alice(alice_config_ext='', env_config_ext='', exp_name_ext='', exp_name_prefix='', results_directory=None): if results_directory is None: results_directory = os.getcwd() + '/results/' config = importlib.import_module('alice_config' + alice_config_ext) env_config = importlib.import_module('env_config' + env_config_ext) # run training, and if nans, creep in, train again until they don't success = False while not success: # initialize experiment using config.py tf.reset_default_graph() #global_step = tf.Variable(0, name = "global_step", trainable = False) env_type, env_param, env_exp_name_ext = env_config.get_config() agent_param, training_param, experiment_name = config.get_config() experiment_name = experiment_name + env_exp_name_ext + exp_name_ext if env_type == 'grid': env = TwoGoalGridWorld(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist) elif env_type == 'key': env = KeyGame(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, spawn_locs=env_param.spawn_locs, spawn_dist=env_param.spawn_dist, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist, key_locs=env_param.key_locs, master_key_locs=env_param.master_key_locs) print('Initialized environment.') with tf.variable_scope('alice'): alice = TabularREINFORCE( env, use_action_info=agent_param.use_action_info, use_state_info=agent_param.use_state_info) print('Initialized agent.') saver = tf.train.Saver() # run experiment with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print('Beginning training.') stats, success = reinforce( env=env, agent=alice, training_steps=training_param.training_steps, learning_rate=training_param.learning_rate, entropy_scale=training_param.entropy_scale, value_scale=training_param.value_scale, action_info_scale=training_param.action_info_scale, state_info_scale=training_param.state_info_scale, state_count_discount=training_param.state_count_discount, state_count_smoothing=training_param.state_count_smoothing, discount_factor=training_param.discount_factor, max_episode_length=training_param.max_episode_length) if success: print('Finished training.') values = get_values(alice, env, sess) # state X goal print('Extracted values.') if alice.use_action_info: action_kls = get_kls(alice, env, sess) # state X goal print('Extracted kls.') else: action_kls = None if alice.use_state_info: ps_g = stats.state_goal_counts / np.sum( stats.state_goal_counts, axis=0) ps = np.sum(stats.state_goal_counts, axis=1) / np.sum( stats.state_goal_counts) ps = np.expand_dims(ps, axis=1) lso = np.log2(ps_g / ps) # state X goal print('Extracted log state odds.') else: lso = None action_probs = get_action_probs(alice, env, sess) # state X goal X action print('Extracted policy.') # save session experiment_directory = exp_name_prefix + datetime.datetime.now( ).strftime("%Y_%m_%d_%H%M") + '_' + experiment_name + '/' directory = results_directory + experiment_directory save_path = saver.save(sess, directory + "alice.ckpt") print('') print("Model saved in path: %s" % save_path) else: print('Unsucessful run - restarting.') f = open('error.txt', 'a') d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p") f.write("{}: experiment '{}' failed and reran\n".format( d, exp_name_prefix + experiment_name)) f.close() time.sleep(10) # save experiment stats total_steps, steps_per_reward = first_time_to(stats.episode_lengths, stats.episode_rewards) result = Result(episode_lengths=stats.episode_lengths, episode_rewards=stats.episode_rewards, episode_modified_rewards=stats.episode_modified_rewards, episode_keys=stats.episode_keys, values=values, action_kls=action_kls, log_state_odds=lso, action_probs=action_probs, state_goal_counts=stats.state_goal_counts, steps_per_reward=steps_per_reward, total_steps=total_steps) if not os.path.exists(directory): os.makedirs(directory) with open(directory + 'results.pkl', 'wb') as output: pickle.dump(result, output, pickle.HIGHEST_PROTOCOL) print('Saved stats.') # copy config file to results directory to ensure experiment repeatable copy(os.getcwd() + '/alice_config' + alice_config_ext + '.py', directory + 'alice_config.py') copy(os.getcwd() + '/env_config' + env_config_ext + '.py', directory + 'env_config.py') print('Copied configs.') # plot experiment and save figures FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title']) figure_sizes = FigureSizes(figure=(50, 25), tick_label=40, axis_label=50, title=60) avg_steps_per_reward, _, action_info, state_info = plot_episode_stats( stats, figure_sizes, noshow=True, directory=directory) if env_type == 'grid': k = 15 print('') print('-' * k + 'VALUES' + '-' * k) plot_value_map(values, action_probs, env, figure_sizes, noshow=True, directory=directory) if action_kls is not None: print('') print('-' * k + 'KLS' + '-' * k) plot_kl_map(action_kls, action_probs, env, figure_sizes, noshow=True, directory=directory) if lso is not None: print('') print('-' * k + 'LSOS' + '-' * k) plot_lso_map(lso, action_probs, env, figure_sizes, noshow=True, directory=directory) print('') print('-' * k + 'STATE DENSITIES' + '-' * k) plot_state_densities(stats.state_goal_counts, action_probs, env, figure_sizes, noshow=True, directory=directory) print('') print('-' * k + 'POLICY' + '-' * k) print_policy(action_probs, env) print('') print('FINISHED') return avg_steps_per_reward, action_info, state_info, experiment_name
def plot_multiple_experiments(list_of_directories, exp_names_and_colors, figure_sizes, collection_name): # load results results_directory = os.getcwd()+'/results/' results = [] colors = [] labels = [] labels_added = set() for d in list_of_directories: r = pickle.load(open(results_directory+d+'/results.pkl','rb')) results.append(r) # if directory name contains exp_name, color it with corresponding color color_found = False for k in exp_names_and_colors.keys(): if k in d: colors.append(exp_names_and_colors[k]) color_found = True if k in labels_added: labels.append(None) else: labels.append(k) labels_added.add(k) # will this work? break if not color_found: raise ValueError('No names in exp_names_and_colors appeared in {}'.format(d)) # plot rewards vs time and write reward rates to text file rate_per_what = 100 f = open(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.txt','w') f.write('REWARD RATES PER %i TIME STEPS\n' % rate_per_what) fig1 = plt.figure(figsize = figure_sizes.figure) # plot bob f.write("***** BOB *****\n") for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] cumulative_steps = np.cumsum(r.bob.episode_lengths) cumulative_rewards = np.cumsum(r.bob.episode_rewards) plt.plot(cumulative_steps, cumulative_rewards, color = c, linestyle = '-', label = l, linewidth = 8) # write reward rates to text file N = 10000 rate = rate_per_what*rate_last_N(cumulative_steps, cumulative_rewards, N = N) f.write("'%s': %i (last %i steps)\n" % (d, rate, N)) # plot alice f.write("***** ALICE *****\n") for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] cumulative_steps = np.cumsum(r.alice.episode_lengths) cumulative_rewards = np.cumsum(r.alice.episode_rewards) plt.plot(cumulative_steps, cumulative_rewards, color = c, linestyle = '--', label = None, linewidth = 8) # write reward rates to text file N = 10000 rate = rate_per_what*rate_last_N(cumulative_steps, cumulative_rewards, N = N) f.write("'%s': %i (last %i steps)\n" % (d, rate, N)) plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label) plt.ylabel("Total Reward", fontsize = figure_sizes.axis_label) #plt.xlim((0, np.min(total_steps))) #plt.ylim(ymin = 0) plt.title("Total Reward over Time", fontsize = figure_sizes.title) plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_reward_per_timestep.png', format='png') plt.close(fig1) # plot smoothed episode lengths over time window = 1000 fig2 = plt.figure(figsize = figure_sizes.figure) # plot bob for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] episode_lengths_smoothed = pd.Series(r.bob.episode_lengths).rolling(window, min_periods = window).mean() plt.plot(episode_lengths_smoothed, color = c, linestyle = '-', label = l, linewidth = 8) # plot alice for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] average_episode_length = np.mean(r.alice.episode_lengths) plt.axhline(y = average_episode_length, color = c, linestyle = '--', label = None, linewidth = 8) plt.xlabel("Episode", fontsize = figure_sizes.axis_label) plt.ylabel("Episode Length", fontsize = figure_sizes.axis_label) plt.title("Episode Length over Time (Smoothed over {} episodes)".format(window), fontsize = figure_sizes.title) #plt.xlim((0, np.min(total_steps))) plt.ylim(ymin = 0) plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_smoothed_episode_lengths.png', format='png') plt.close(fig2) # Plot time steps per unit reward (smoothed) window = 500 fig3 = plt.figure(figsize = figure_sizes.figure) # plot bob for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] # total_steps, steps_per_reward = first_time_to(r.bob.episode_lengths, r.bob.episode_rewards) total_steps = r.bob.total_steps steps_per_reward = r.bob.steps_per_reward steps_per_reward_smoothed = pd.Series(steps_per_reward).rolling(window, min_periods = window).mean() plt.plot(total_steps, steps_per_reward_smoothed, color = c, linestyle = '-', label = l, linewidth = 8) # plot alice for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] average_steps_per_reward = np.sum(r.alice.episode_lengths)/np.sum(r.alice.episode_rewards) plt.axhline(y = average_steps_per_reward, color = c, linestyle = '--', label = None, linewidth = 8) plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label) plt.ylabel("Time Steps per Reward", fontsize = figure_sizes.axis_label) # plt.title("Steps per Reward over Time (Smoothed over approximately {} episodes)".format(window), fontsize = figure_sizes.title) #plt.xlim((0, np.min(total_steps))) _, ymax = plt.gca().get_ylim() plt.ylim(0, min(2*average_steps_per_reward,ymax)) plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_steps_per_reward.png', format='png') plt.close(fig3) # Plot time steps per unit reward as % of Alice's window = 500 fig4 = plt.figure(figsize = figure_sizes.figure) # plot bob for n in range(len(results)-1,-1,-1): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] total_steps, steps_per_reward = first_time_to(r.bob.episode_lengths, r.bob.episode_rewards) average_steps_per_reward = np.sum(r.alice.episode_lengths)/np.sum(r.alice.episode_rewards) bob_over_alice = steps_per_reward/average_steps_per_reward bob_over_alice_smoothed = pd.Series(bob_over_alice).rolling(window, min_periods = window).mean() plt.plot(total_steps, bob_over_alice_smoothed, color = c, linestyle = '-', label = l, linewidth = 8) plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label) plt.ylabel("Bob Normalized Episode Length", fontsize = figure_sizes.axis_label) # plt.title("Bob Steps per Reward / Alice's Average (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) #plt.xlim((0, np.min(total_steps))) #_, ymax = plt.gca().get_ylim() plt.ylim((.95, 2)) plt.legend(loc = 'upper right', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_normalized_steps_per_reward.png', format='png') plt.close(fig4) # Plot percentage of time Bob beats Alice to the goal window = 1000 fig5 = plt.figure(figsize = figure_sizes.figure) # plot bob for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] bob_beats_alice = np.array(r.bob.episode_lengths) < np.array(r.alice.episode_lengths) bob_beats_alice[np.array(r.bob.episode_rewards)<0] = 0 # filter out episodes where bob goes to wrong goal bob_win_percentage = pd.Series(bob_beats_alice).rolling(window, min_periods = window).mean() total_steps = np.cumsum(r.bob.episode_lengths) plt.plot(total_steps, bob_win_percentage, color = c, linestyle = '-', label = l, linewidth = 8) plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label) plt.ylabel("% of time Bob beats Alice to goal", fontsize = figure_sizes.axis_label) # plt.title("Bob's Win Percentage (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) #plt.xlim((0, np.min(total_steps))) #_, ymax = plt.gca().get_ylim() plt.ylim((0, 1)) plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_percentage.png', format='png') plt.close(fig5) # Plot percentage of time Bob beats or ties Alice to the goal window = 1000 fig6 = plt.figure(figsize = figure_sizes.figure) # plot bob for n in range(len(results)): r = results[n] c = colors[n] l = labels[n] d = list_of_directories[n] bob_beats_alice = np.array(r.bob.episode_lengths) <= np.array(r.alice.episode_lengths) bob_beats_alice[np.array(r.bob.episode_rewards)<0] = 0 # filter out episodes where bob goes to wrong goal bob_win_percentage = pd.Series(bob_beats_alice).rolling(window, min_periods = window).mean() total_steps = np.cumsum(r.bob.episode_lengths) plt.plot(total_steps, bob_win_percentage, color = c, linestyle = '-', label = l, linewidth = 8) plt.xlabel("Time Steps", fontsize = figure_sizes.axis_label) plt.ylabel("% of time Bob beats/ties Alice to goal", fontsize = figure_sizes.axis_label) # plt.title("Bob's Win+Tie Percentage (Smoothed over ~{} episodes)".format(window), fontsize = figure_sizes.title) #plt.xlim((0, np.min(total_steps))) #_, ymax = plt.gca().get_ylim() plt.ylim((0, 1)) plt.legend(loc = 'upper left', fontsize = figure_sizes.axis_label) plt.tick_params(labelsize = figure_sizes.tick_label) plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.eps', format='eps') plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.pdf', format='pdf') plt.savefig(os.getcwd()+'/results/'+collection_name+'_bob_win_tie_percentage.png', format='png') plt.close(fig6) return
def train_bob(bob_config_ext = '', exp_name_ext = '', exp_name_prefix = '', results_directory = None): if results_directory is None: results_directory = os.getcwd()+'/results/' # import bob config = importlib.import_module('bob_config'+bob_config_ext) agent_param, training_param, experiment_name, alice_experiment = config.get_config() print('Imported Bob.') # import alice alice_directory = results_directory+alice_experiment+'/' alice_config = imp.load_source('alice_config', alice_directory+'alice_config.py') alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config() print('Imported Alice.') # import and init env env_config = imp.load_source('env_config', alice_directory+'env_config.py') env_param, env_exp_name_ext = env_config.get_config() experiment_name = experiment_name + env_exp_name_ext + exp_name_ext env = TwoGoalGridWorld(shape = env_param.shape, r_correct = env_param.r_correct, r_incorrect = env_param.r_incorrect, r_step = env_param.r_step, r_wall = env_param.r_wall, p_rand = env_param.p_rand, goal_locs = env_param.goal_locs, goal_dist = env_param.goal_dist) print('Imported environment.') # run training, and if nans, creep in, train again until they don't success = False while not success: # initialize alice and bob using configs tf.reset_default_graph() #global_step = tf.Variable(0, name = "global_step", trainable = False) with tf.variable_scope('alice'): alice = TabularREINFORCE(env, use_action_info = alice_agent_param.use_action_info, use_state_info = alice_agent_param.use_state_info) alice_saver = tf.train.Saver() with tf.variable_scope('bob'): bob = RNNObserver(env = env, shared_layer_sizes = agent_param.shared_layer_sizes, policy_layer_sizes = agent_param.policy_layer_sizes, value_layer_sizes = agent_param.value_layer_sizes, use_RNN = agent_param.use_RNN) saver = tf.train.Saver() print('Initialized Alice and Bob.') # run experiment with tf.Session() as sess: sess.run(tf.global_variables_initializer()) alice_saver.restore(sess, alice_directory+'alice.ckpt') print('Loaded trained Alice.') alice_stats, bob_stats, success = reinforce(env = env, alice = alice, bob = bob, training_steps = training_param.training_steps, learning_rate = training_param.learning_rate, entropy_scale = training_param.entropy_scale, value_scale = training_param.value_scale, discount_factor = training_param.discount_factor, max_episode_length = training_param.max_episode_length, bob_goal_access = training_param.bob_goal_access) if success: print('Finished training.') # save session experiment_directory = exp_name_prefix+datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")+'_'+experiment_name+'/' directory = results_directory + experiment_directory print('Saving results in %s.' % directory) if not os.path.exists(directory+'bob/'): os.makedirs(directory+'bob/') save_path = saver.save(sess, directory+'bob/bob.ckpt') print('Saved bob to %s.' % save_path) else: print('Unsucessful run - restarting.') f = open('error.txt','a') d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p") f.write("{}: experiment '{}' failed and reran\n".format(d, exp_name_prefix+experiment_name)) f.close() # save experiment stats print('Building Alice stats.') alice_total_steps, alice_steps_per_reward = first_time_to(alice_stats.episode_lengths, alice_stats.episode_rewards) a = Stats(episode_lengths = alice_stats.episode_lengths, episode_rewards = alice_stats.episode_rewards, episode_action_kl = alice_stats.episode_action_kl, episode_lso = alice_stats.episode_lso, state_goal_counts = alice_stats.state_goal_counts, steps_per_reward = alice_steps_per_reward, total_steps = alice_total_steps) print('Building Bob stats.') bob_total_steps, bob_steps_per_reward = first_time_to(bob_stats.episode_lengths, bob_stats.episode_rewards) b = Stats(episode_lengths = bob_stats.episode_lengths, episode_rewards = bob_stats.episode_rewards, episode_action_kl = None, episode_lso = None, state_goal_counts = None, steps_per_reward = bob_steps_per_reward, total_steps = bob_total_steps) result = Result(alice = a, bob = b) if not os.path.exists(directory): os.makedirs(directory) with open(directory+'results.pkl', 'wb') as output: # copy to locally-defined Stats objects to make pickle happy pickle.dump(result, output, pickle.HIGHEST_PROTOCOL) print('Saved stats.') # copy config file to results directory to ensure experiment repeatable copy(os.getcwd()+'/bob_config'+bob_config_ext+'.py', directory+'bob_config.py') copy(os.getcwd()+'/env_config.py', directory) copy(alice_directory+'alice_config.py', directory) print('Copied configs.') # copy alice checkpoint used if not os.path.exists(directory+'alice/'): os.makedirs(directory+'alice/') for file in glob.glob(alice_directory+'alice.ckpt*'): copy(file, directory+'alice/') copy(alice_directory+'checkpoint', directory+'alice/') print('Copied Alice.') # plot experiment and save figures FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title']) figure_sizes = FigureSizes(figure = (50,25), tick_label = 40, axis_label = 50, title = 60) avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info = plot_episode_stats(result, figure_sizes, noshow = True, directory = directory) print('Figures saved.') print('\nAll results saved in {}'.format(directory)) return avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info, experiment_name
def plot_episode_stats(stats, figure_sizes, noshow=False, directory=None): if type(stats).__name__ == 'Result': alice = stats.alice stats = stats.bob two_agents = True else: alice = stats two_agents = False # Plot the episode length over time (smoothed) window = 500 fig0 = plt.figure(figsize=figure_sizes.figure) episode_lengths_smoothed = pd.Series(stats.episode_lengths).rolling( window, min_periods=window).mean() plt.plot(episode_lengths_smoothed, label='bob') if two_agents: episode_lengths_smoothed = pd.Series(alice.episode_lengths).rolling( window, min_periods=window).mean() plt.plot(episode_lengths_smoothed, label='alice') plt.legend(loc='upper right', fontsize=figure_sizes.axis_label) plt.xlabel("Episode", fontsize=figure_sizes.axis_label) plt.ylabel("Episode Length", fontsize=figure_sizes.axis_label) plt.ylim(ymin=0) plt.title( "Episode Length over Time (Smoothed over {} episodes)".format(window), fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'smoothed_episode_lengths.pdf', format='pdf') plt.savefig(directory + 'smoothed_episode_lengths.png', format='png') if noshow: plt.close(fig0) else: plt.show(fig0) # Plot the episode length over time fig1 = plt.figure(figsize=figure_sizes.figure) plt.plot(stats.episode_lengths, label='bob') if two_agents: plt.plot(alice.episode_lengths, label='alice') plt.legend(loc='upper right', fontsize=figure_sizes.axis_label) plt.xlabel("Episode", fontsize=figure_sizes.axis_label) plt.ylabel("Episode Length", fontsize=figure_sizes.axis_label) plt.ylim(ymin=0) plt.title("Episode Length over Time", fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'episode_lengths.pdf', format='pdf') plt.savefig(directory + 'episode_lengths.png', format='png') if noshow: plt.close(fig1) else: plt.show(fig1) # Plot the episode reward per episode window = 10 fig2 = plt.figure(figsize=figure_sizes.figure) rewards_smoothed = pd.Series(stats.episode_rewards).rolling( window, min_periods=window).mean() plt.plot(rewards_smoothed, label='bob') if two_agents: rewards_smoothed = pd.Series(alice.episode_rewards).rolling( window, min_periods=window).mean() plt.plot(rewards_smoothed, label='alice') plt.legend(loc='lower right', fontsize=figure_sizes.axis_label) plt.xlabel("Episode", fontsize=figure_sizes.axis_label) plt.ylabel("Episode Reward (Smoothed)", fontsize=figure_sizes.axis_label) plt.title("Episode Reward over Time (Smoothed over window size {})".format( window), fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'episode_rewards.pdf', format='pdf') plt.savefig(directory + 'episode_rewards.png', format='png') if noshow: plt.close(fig2) else: plt.show(fig2) # Plot the episode reward per time step fig3 = plt.figure(figsize=figure_sizes.figure) rate_per_what = 100 N = 10000 cumulative_steps = np.cumsum(stats.episode_lengths) cumulative_rewards = np.cumsum(stats.episode_rewards) r = rate_per_what * rate_last_N(cumulative_steps, cumulative_rewards, N=N) title = 'Reward per %i steps (last %i steps): %i' % (rate_per_what, N, r) plt.plot(cumulative_steps, cumulative_rewards, linewidth=8, label='bob') if two_agents: cumulative_steps = np.cumsum(alice.episode_lengths) cumulative_rewards = np.cumsum(alice.episode_rewards) r_alice = rate_per_what * rate_last_N( cumulative_steps, cumulative_rewards, N=N) title = 'Reward per %i steps (last %i steps): Bob %i, Alice %i' % ( rate_per_what, N, r, r_alice) plt.plot(cumulative_steps, cumulative_rewards, linewidth=8, label='alice') plt.legend(loc='upper left', fontsize=figure_sizes.axis_label) plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label) plt.ylabel("Total Reward", fontsize=figure_sizes.axis_label) plt.title(title, fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'reward_per_timestep.pdf', format='pdf') plt.savefig(directory + 'reward_per_timestep.png', format='png') if noshow: plt.close(fig3) else: plt.show(fig3) if alice.episode_action_kl is not None: # Plot a rolling estimate of I(action;goal|state) window = 1000 # measure in episodes fig4 = plt.figure(figsize=figure_sizes.figure) cumulative_steps = np.cumsum(alice.episode_lengths) info_smoothed = pd.Series( np.asarray(alice.episode_action_kl) / np.asarray(alice.episode_lengths)).rolling( window, min_periods=window).mean() N = 10000 action_info = mean_last_N(cumulative_steps, info_smoothed, N=N) plt.plot(cumulative_steps, info_smoothed) plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label) plt.ylabel("I(action;goal|state)", fontsize=figure_sizes.axis_label) plt.title( "Info estimated over sliding window of {} episodes".format(window), fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'action_info.pdf', format='pdf') plt.savefig(directory + 'action_info.png', format='png') if noshow: plt.close(fig4) else: plt.show(fig4) else: fig4 = None action_info = None if alice.episode_lso is not None: # Plot a rolling estimate of I(state;goal) window = 1000 # measure in episodes fig5 = plt.figure(figsize=figure_sizes.figure) cumulative_steps = np.cumsum(alice.episode_lengths) info_smoothed = pd.Series( np.asarray(alice.episode_lso) / np.asarray(alice.episode_lengths)).rolling( window, min_periods=window).mean() N = 10000 state_info = mean_last_N(cumulative_steps, info_smoothed, N=N) plt.plot(cumulative_steps, info_smoothed) plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label) plt.ylabel("I(state;goal)", fontsize=figure_sizes.axis_label) plt.title( "Info estimated over sliding window of {} episodes".format(window), fontsize=figure_sizes.title) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'state_info.pdf', format='pdf') plt.savefig(directory + 'state_info.png', format='png') if noshow: plt.close(fig5) else: plt.show(fig5) else: fig5 = None state_info = None # Plot time steps per unit reward (smoothed) window = 500 fig6 = plt.figure(figsize=figure_sizes.figure) total_steps, steps_per_reward = first_time_to(stats.episode_lengths, stats.episode_rewards) N = 10000 average_steps_per_reward = mean_last_N(total_steps, steps_per_reward, N=N) steps_per_reward_smoothed = pd.Series(steps_per_reward).rolling( window, min_periods=window).mean() if two_agents: lab = 'bob' else: lab = 'alice' plt.plot(total_steps, steps_per_reward_smoothed, color='b', label=lab, linewidth=8) if two_agents: average_steps_per_reward_alice = np.sum( alice.episode_lengths) / np.sum(alice.episode_rewards) plt.axhline(y=average_steps_per_reward_alice, color='r', label='alice', linewidth=8) plt.legend(loc='upper right', fontsize=figure_sizes.axis_label) _, ymax = plt.gca().get_ylim() plt.ylim(0, min(6 * average_steps_per_reward_alice, ymax)) tit = "Smoothed over ~%i episodes, Mean (last %i steps): Bob %.1f, Alice %.1f" % ( window, N, average_steps_per_reward, average_steps_per_reward_alice) else: average_steps_per_reward_alice = None tit = "Smoothed over ~%i episodes, Mean (last %i steps): %.1f" % ( window, N, average_steps_per_reward) plt.title(tit, fontsize=figure_sizes.title) plt.xlabel("Time Steps", fontsize=figure_sizes.axis_label) plt.ylabel("Time Steps per Reward", fontsize=figure_sizes.axis_label) plt.tick_params(labelsize=figure_sizes.tick_label) if directory: plt.savefig(directory + 'steps_per_reward.pdf', format='pdf') plt.savefig(directory + 'steps_per_reward.png', format='png') if noshow: plt.close(fig6) else: plt.show(fig6) return average_steps_per_reward, average_steps_per_reward_alice, action_info, state_info