def play_from_directory(experiment_name): cwd = os.getcwd() directory = cwd + '/results/' + experiment_name + '/' os.chdir(directory) #sys.path.append('/results/'+experiment_name) # unpickle results results = pickle.load(open(directory + 'results.pkl', 'rb')) # import configs import alice_config alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config( ) import env_config env_param, _ = env_config.get_config() import bob_config agent_param, training_param, experiment_name, alice_experiment = bob_config.get_config( ) # initialize experiment using configs tf.reset_default_graph() #global_step = tf.Variable(0, name = "global_step", trainable = False) env = TwoGoalGridWorld(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist) with tf.variable_scope('alice'): alice = TabularREINFORCE( env=env, use_action_info=alice_agent_param.use_action_info, use_state_info=alice_agent_param.use_state_info) #alice_saver = tf.train.Saver() with tf.variable_scope('bob'): bob = RNNObserver(env=env, shared_layer_sizes=agent_param.shared_layer_sizes, policy_layer_sizes=agent_param.policy_layer_sizes, value_layer_sizes=agent_param.value_layer_sizes, use_RNN=agent_param.use_RNN) bob_saver = tf.train.Saver() # simulate an episode with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #alice_saver.restore(sess, directory+'alice/alice.ckpt') bob_saver.restore(sess, directory + 'bob/bob.ckpt') play(env=env, alice=alice, bob=bob, results=results, bob_goal_access=training_param.bob_goal_access, gamma=training_param.discount_factor) os.chdir(cwd) return
import sys if "../" not in sys.path: sys.path.append("../") from envs.TwoGoalGridWorld import TwoGoalGridWorld UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 STAY = 4 env = TwoGoalGridWorld(shape=[3, 4], r_correct=1, r_incorrect=-1, r_step=0, r_wall=-.1, p_rand=0, goal_locs=None, goal_dist=None) #print('move left into wall') #print(env.P[4][LEFT]) #print(env.P[8][LEFT]) #print('move right from left wall') #print(env.P[4][RIGHT]) #print(env.P[8][RIGHT]) #print('move right into wall') #print(env.P[7][LEFT]) #print(env.P[11][LEFT]) #print('move left from right wall') #print(env.P[7][RIGHT]) #print(env.P[11][RIGHT])
def train_alice(alice_config_ext='', env_config_ext='', exp_name_ext='', exp_name_prefix='', results_directory=None): if results_directory is None: results_directory = os.getcwd() + '/results/' config = importlib.import_module('alice_config' + alice_config_ext) env_config = importlib.import_module('env_config' + env_config_ext) # run training, and if nans, creep in, train again until they don't success = False while not success: # initialize experiment using config.py tf.reset_default_graph() #global_step = tf.Variable(0, name = "global_step", trainable = False) env_type, env_param, env_exp_name_ext = env_config.get_config() agent_param, training_param, experiment_name = config.get_config() experiment_name = experiment_name + env_exp_name_ext + exp_name_ext if env_type == 'grid': env = TwoGoalGridWorld(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist) elif env_type == 'key': env = KeyGame(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, spawn_locs=env_param.spawn_locs, spawn_dist=env_param.spawn_dist, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist, key_locs=env_param.key_locs, master_key_locs=env_param.master_key_locs) print('Initialized environment.') with tf.variable_scope('alice'): alice = TabularREINFORCE( env, use_action_info=agent_param.use_action_info, use_state_info=agent_param.use_state_info) print('Initialized agent.') saver = tf.train.Saver() # run experiment with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print('Beginning training.') stats, success = reinforce( env=env, agent=alice, training_steps=training_param.training_steps, learning_rate=training_param.learning_rate, entropy_scale=training_param.entropy_scale, value_scale=training_param.value_scale, action_info_scale=training_param.action_info_scale, state_info_scale=training_param.state_info_scale, state_count_discount=training_param.state_count_discount, state_count_smoothing=training_param.state_count_smoothing, discount_factor=training_param.discount_factor, max_episode_length=training_param.max_episode_length) if success: print('Finished training.') values = get_values(alice, env, sess) # state X goal print('Extracted values.') if alice.use_action_info: action_kls = get_kls(alice, env, sess) # state X goal print('Extracted kls.') else: action_kls = None if alice.use_state_info: ps_g = stats.state_goal_counts / np.sum( stats.state_goal_counts, axis=0) ps = np.sum(stats.state_goal_counts, axis=1) / np.sum( stats.state_goal_counts) ps = np.expand_dims(ps, axis=1) lso = np.log2(ps_g / ps) # state X goal print('Extracted log state odds.') else: lso = None action_probs = get_action_probs(alice, env, sess) # state X goal X action print('Extracted policy.') # save session experiment_directory = exp_name_prefix + datetime.datetime.now( ).strftime("%Y_%m_%d_%H%M") + '_' + experiment_name + '/' directory = results_directory + experiment_directory save_path = saver.save(sess, directory + "alice.ckpt") print('') print("Model saved in path: %s" % save_path) else: print('Unsucessful run - restarting.') f = open('error.txt', 'a') d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p") f.write("{}: experiment '{}' failed and reran\n".format( d, exp_name_prefix + experiment_name)) f.close() time.sleep(10) # save experiment stats total_steps, steps_per_reward = first_time_to(stats.episode_lengths, stats.episode_rewards) result = Result(episode_lengths=stats.episode_lengths, episode_rewards=stats.episode_rewards, episode_modified_rewards=stats.episode_modified_rewards, episode_keys=stats.episode_keys, values=values, action_kls=action_kls, log_state_odds=lso, action_probs=action_probs, state_goal_counts=stats.state_goal_counts, steps_per_reward=steps_per_reward, total_steps=total_steps) if not os.path.exists(directory): os.makedirs(directory) with open(directory + 'results.pkl', 'wb') as output: pickle.dump(result, output, pickle.HIGHEST_PROTOCOL) print('Saved stats.') # copy config file to results directory to ensure experiment repeatable copy(os.getcwd() + '/alice_config' + alice_config_ext + '.py', directory + 'alice_config.py') copy(os.getcwd() + '/env_config' + env_config_ext + '.py', directory + 'env_config.py') print('Copied configs.') # plot experiment and save figures FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title']) figure_sizes = FigureSizes(figure=(50, 25), tick_label=40, axis_label=50, title=60) avg_steps_per_reward, _, action_info, state_info = plot_episode_stats( stats, figure_sizes, noshow=True, directory=directory) if env_type == 'grid': k = 15 print('') print('-' * k + 'VALUES' + '-' * k) plot_value_map(values, action_probs, env, figure_sizes, noshow=True, directory=directory) if action_kls is not None: print('') print('-' * k + 'KLS' + '-' * k) plot_kl_map(action_kls, action_probs, env, figure_sizes, noshow=True, directory=directory) if lso is not None: print('') print('-' * k + 'LSOS' + '-' * k) plot_lso_map(lso, action_probs, env, figure_sizes, noshow=True, directory=directory) print('') print('-' * k + 'STATE DENSITIES' + '-' * k) plot_state_densities(stats.state_goal_counts, action_probs, env, figure_sizes, noshow=True, directory=directory) print('') print('-' * k + 'POLICY' + '-' * k) print_policy(action_probs, env) print('') print('FINISHED') return avg_steps_per_reward, action_info, state_info, experiment_name
os.chdir("..") directory = os.getcwd() + '/results/' + experiment + '/' r = pickle.load(open(directory + 'results.pkl', 'rb')) values = r.values action_probs = r.action_probs action_kls = r.action_kls lso = r.log_state_odds state_goal_counts = r.state_goal_counts # load env env_config = imp.load_source('env_config', directory + 'env_config.py') env_type, env_param, env_exp_name_ext = env_config.get_config() if env_type == 'grid': env = TwoGoalGridWorld(shape=env_param.shape, r_correct=env_param.r_correct, r_incorrect=env_param.r_incorrect, r_step=env_param.r_step, r_wall=env_param.r_wall, p_rand=env_param.p_rand, goal_locs=env_param.goal_locs, goal_dist=env_param.goal_dist) else: raise ValueError('Invalid env.') # figure sizes FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title']) figure_sizes = FigureSizes(figure=(50, 25), tick_label=40, axis_label=50, title=60) # do the plots k = 15
def train_bob(bob_config_ext = '', exp_name_ext = '', exp_name_prefix = '', results_directory = None): if results_directory is None: results_directory = os.getcwd()+'/results/' # import bob config = importlib.import_module('bob_config'+bob_config_ext) agent_param, training_param, experiment_name, alice_experiment = config.get_config() print('Imported Bob.') # import alice alice_directory = results_directory+alice_experiment+'/' alice_config = imp.load_source('alice_config', alice_directory+'alice_config.py') alice_agent_param, alice_training_param, alice_experiment_name = alice_config.get_config() print('Imported Alice.') # import and init env env_config = imp.load_source('env_config', alice_directory+'env_config.py') env_param, env_exp_name_ext = env_config.get_config() experiment_name = experiment_name + env_exp_name_ext + exp_name_ext env = TwoGoalGridWorld(shape = env_param.shape, r_correct = env_param.r_correct, r_incorrect = env_param.r_incorrect, r_step = env_param.r_step, r_wall = env_param.r_wall, p_rand = env_param.p_rand, goal_locs = env_param.goal_locs, goal_dist = env_param.goal_dist) print('Imported environment.') # run training, and if nans, creep in, train again until they don't success = False while not success: # initialize alice and bob using configs tf.reset_default_graph() #global_step = tf.Variable(0, name = "global_step", trainable = False) with tf.variable_scope('alice'): alice = TabularREINFORCE(env, use_action_info = alice_agent_param.use_action_info, use_state_info = alice_agent_param.use_state_info) alice_saver = tf.train.Saver() with tf.variable_scope('bob'): bob = RNNObserver(env = env, shared_layer_sizes = agent_param.shared_layer_sizes, policy_layer_sizes = agent_param.policy_layer_sizes, value_layer_sizes = agent_param.value_layer_sizes, use_RNN = agent_param.use_RNN) saver = tf.train.Saver() print('Initialized Alice and Bob.') # run experiment with tf.Session() as sess: sess.run(tf.global_variables_initializer()) alice_saver.restore(sess, alice_directory+'alice.ckpt') print('Loaded trained Alice.') alice_stats, bob_stats, success = reinforce(env = env, alice = alice, bob = bob, training_steps = training_param.training_steps, learning_rate = training_param.learning_rate, entropy_scale = training_param.entropy_scale, value_scale = training_param.value_scale, discount_factor = training_param.discount_factor, max_episode_length = training_param.max_episode_length, bob_goal_access = training_param.bob_goal_access) if success: print('Finished training.') # save session experiment_directory = exp_name_prefix+datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")+'_'+experiment_name+'/' directory = results_directory + experiment_directory print('Saving results in %s.' % directory) if not os.path.exists(directory+'bob/'): os.makedirs(directory+'bob/') save_path = saver.save(sess, directory+'bob/bob.ckpt') print('Saved bob to %s.' % save_path) else: print('Unsucessful run - restarting.') f = open('error.txt','a') d = datetime.datetime.now().strftime("%A, %B %d, %I:%M:%S %p") f.write("{}: experiment '{}' failed and reran\n".format(d, exp_name_prefix+experiment_name)) f.close() # save experiment stats print('Building Alice stats.') alice_total_steps, alice_steps_per_reward = first_time_to(alice_stats.episode_lengths, alice_stats.episode_rewards) a = Stats(episode_lengths = alice_stats.episode_lengths, episode_rewards = alice_stats.episode_rewards, episode_action_kl = alice_stats.episode_action_kl, episode_lso = alice_stats.episode_lso, state_goal_counts = alice_stats.state_goal_counts, steps_per_reward = alice_steps_per_reward, total_steps = alice_total_steps) print('Building Bob stats.') bob_total_steps, bob_steps_per_reward = first_time_to(bob_stats.episode_lengths, bob_stats.episode_rewards) b = Stats(episode_lengths = bob_stats.episode_lengths, episode_rewards = bob_stats.episode_rewards, episode_action_kl = None, episode_lso = None, state_goal_counts = None, steps_per_reward = bob_steps_per_reward, total_steps = bob_total_steps) result = Result(alice = a, bob = b) if not os.path.exists(directory): os.makedirs(directory) with open(directory+'results.pkl', 'wb') as output: # copy to locally-defined Stats objects to make pickle happy pickle.dump(result, output, pickle.HIGHEST_PROTOCOL) print('Saved stats.') # copy config file to results directory to ensure experiment repeatable copy(os.getcwd()+'/bob_config'+bob_config_ext+'.py', directory+'bob_config.py') copy(os.getcwd()+'/env_config.py', directory) copy(alice_directory+'alice_config.py', directory) print('Copied configs.') # copy alice checkpoint used if not os.path.exists(directory+'alice/'): os.makedirs(directory+'alice/') for file in glob.glob(alice_directory+'alice.ckpt*'): copy(file, directory+'alice/') copy(alice_directory+'checkpoint', directory+'alice/') print('Copied Alice.') # plot experiment and save figures FigureSizes = namedtuple('FigureSizes', ['figure', 'tick_label', 'axis_label', 'title']) figure_sizes = FigureSizes(figure = (50,25), tick_label = 40, axis_label = 50, title = 60) avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info = plot_episode_stats(result, figure_sizes, noshow = True, directory = directory) print('Figures saved.') print('\nAll results saved in {}'.format(directory)) return avg_steps_per_reward, avg_steps_per_reward_alice, action_info, state_info, experiment_name