def main(): print "Tree Backup Two Step" env = CliffWalkingEnv() Total_num_experiments = 5 num_episodes = 20 alpha = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1]) Averaged_All_Rwd_Alpha = np.zeros(shape=(num_episodes, len(alpha))) Averaged_All_Error_Alpha = np.zeros(shape=(num_episodes, len(alpha))) for e in range(Total_num_experiments): All_Rwd_Alpha, All_Error_Alpha = tree_backup_two_step( env, num_episodes) Averaged_All_Rwd_Alpha = Averaged_All_Rwd_Alpha + All_Rwd_Alpha Averaged_All_Error_Alpha = Averaged_All_Error_Alpha + All_Error_Alpha Averaged_All_Rwd_Alpha = np.true_divide(Averaged_All_Rwd_Alpha, Total_num_experiments) Averaged_All_Error_Alpha = np.true_divide(Averaged_All_Error_Alpha, Total_num_experiments) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup_Two_Step_' + 'Reward_Alpha_' + '.npy', Averaged_All_Rwd_Alpha) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup_Two_Step_' + 'Error_Alpha_' + '.npy', Averaged_All_Error_Alpha) env.close()
def main(): print "Adaptive Q(sigma) On Policy" env = CliffWalkingEnv() Total_num_experiments = 10 num_episodes = 2000 alpha = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1]) sigma_initialised = np.array([1, 0.75, 0.5, 0.25, 0]) Averaged_All_Rwd_Sigma = np.zeros(shape=(num_episodes, len(sigma_initialised))) Averaged_All_Rwd_Sigma_Alpha = np.zeros(shape=(len(sigma_initialised), len(alpha))) Averaged_All_Error_Sigma = np.zeros(shape=(num_episodes, len(sigma_initialised))) Averaged_All_Error_Sigma_Alpha = np.zeros(shape=(len(sigma_initialised), len(alpha))) for e in range(Total_num_experiments): All_Rwd_Sigma, All_Error_Sigma, All_Rwd_Sigma_Alpha, All_Error_Sigma_Alpha = adaptive_q_sigma_on_policy( env, num_episodes) Averaged_All_Rwd_Sigma = Averaged_All_Rwd_Sigma + All_Rwd_Sigma Averaged_All_Rwd_Sigma_Alpha = Averaged_All_Rwd_Sigma_Alpha + All_Rwd_Sigma_Alpha Averaged_All_Error_Sigma = Averaged_All_Error_Sigma + All_Error_Sigma Averaged_All_Error_Sigma_Alpha = Averaged_All_Error_Sigma_Alpha + All_Error_Sigma_Alpha Averaged_All_Rwd_Sigma = np.true_divide(Averaged_All_Rwd_Sigma, Total_num_experiments) Averaged_All_Rwd_Sigma_Alpha = np.true_divide(Averaged_All_Rwd_Sigma_Alpha, Total_num_experiments) Averaged_All_Error_Sigma = np.true_divide(Averaged_All_Error_Sigma, Total_num_experiments) Averaged_All_Error_Sigma_Alpha = np.true_divide( Averaged_All_Error_Sigma_Alpha, Total_num_experiments) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Adaptive_OnPolicy_Q_Sigma_Results/' + 'Adaptive_On_Policy_Q_sigma' + 'Reward_Sigma_' + '.npy', Averaged_All_Rwd_Sigma) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Adaptive_OnPolicy_Q_Sigma_Results/' + 'Adaptive_On_Policy_Q_sigma' + 'Sigma_Alpha' + '.npy', Averaged_All_Rwd_Sigma_Alpha) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Adaptive_OnPolicy_Q_Sigma_Results/' + 'Adaptive_On_Policy_Q_sigma' + 'Error_Sigma_' + '.npy', Averaged_All_Error_Sigma) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Adaptive_OnPolicy_Q_Sigma_Results/' + 'Adaptive_On_Policy_Q_sigma' + 'Error_Sigma_Alpha' + '.npy', Averaged_All_Error_Sigma_Alpha) # plotting.plot_episode_stats(stats_tree_lambda) env.close()
def main(): print "Tree Backup(lambda)" env = CliffWalkingEnv() Total_num_experiments = 10 num_episodes = 1000 lambda_param = np.array( [0, 0.1, 0.15, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.975, 0.99, 1]) alpha = np.array([0.1, 0.2, 0.4, 0.6, 0.8, 1]) Averaged_All_Rwd_Lambda = np.zeros(shape=(num_episodes, len(lambda_param))) Averaged_All_Lambda_Alpha = np.zeros(shape=(len(lambda_param), len(alpha))) Averaged_All_Error_Lambda = np.zeros(shape=(num_episodes, len(lambda_param))) Averaged_All_Error_Lambda_Alpha = np.zeros(shape=(len(lambda_param), len(alpha))) for e in range(Total_num_experiments): All_Rwd_Lambda, All_Lambda_Alpha, All_Error_Lambda, All_Error_Lambda_Alpha = tree_backup_lambda( env, num_episodes) Averaged_All_Rwd_Lambda = Averaged_All_Rwd_Lambda + All_Rwd_Lambda Averaged_All_Lambda_Alpha = Averaged_All_Lambda_Alpha + All_Lambda_Alpha Averaged_All_Error_Lambda = Averaged_All_Error_Lambda + All_Error_Lambda Averaged_All_Error_Lambda_Alpha = Averaged_All_Error_Lambda_Alpha + All_Error_Lambda_Alpha Averaged_All_Rwd_Lambda = np.true_divide(Averaged_All_Rwd_Lambda, Total_num_experiments) Averaged_All_Lambda_Alpha = np.true_divide(Averaged_All_Lambda_Alpha, Total_num_experiments) Averaged_All_Error_Lambda = np.true_divide(Averaged_All_Error_Lambda, Total_num_experiments) Averaged_All_Error_Lambda_Alpha = np.true_divide( Averaged_All_Error_Lambda_Alpha, Total_num_experiments) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup(lambda)_' + 'Reward_Lambda_' + '.npy', Averaged_All_Rwd_Lambda) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup(lambda)_' + 'Lambda_Alpha' + '.npy', Averaged_All_Lambda_Alpha) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup(lambda)_' + 'Error_Lambda_' + '.npy', Averaged_All_Error_Lambda) np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/Tree_Backup_Results/' + 'Tree Backup(lambda)_' + 'Error_Lambda_Alpha' + '.npy', Averaged_All_Error_Lambda_Alpha) # plotting.plot_episode_stats(stats_tree_lambda) env.close()
def main(): print "Tree Backup(lambda)" env = CliffWalkingEnv() Total_num_experiments = 2 num_episodes = 30 theta = np.zeros(shape=(400, env.action_space.n)) lambda_param = np.array([0.1, 0.15, 0.2, 0.4, 0.6, 0.8, 0.9, 1]) alpha = np.array([0.1, 0.2, 0.4, 0.5]) Averaged_All_Rwd_Lambda = np.zeros(shape=(num_episodes, len(lambda_param))) Averaged_All_Lambda_Alpha = np.zeros(shape=(len(lambda_param), len(alpha))) Averaged_All_Error_Lambda = np.zeros(shape=(num_episodes, len(lambda_param))) Averaged_All_Error_Lambda_Alpha = np.zeros(shape=(len(lambda_param), len(alpha))) for e in range(Total_num_experiments): All_Rwd_Lambda, All_Lambda_Alpha, All_Error_Lambda, All_Error_Lambda_Alpha = tree_backup_lambda(env, theta, num_episodes) Averaged_All_Rwd_Lambda = Averaged_All_Rwd_Lambda + All_Rwd_Lambda Averaged_All_Lambda_Alpha = Averaged_All_Lambda_Alpha + All_Lambda_Alpha Averaged_All_Error_Lambda = Averaged_All_Error_Lambda + All_Error_Lambda Averaged_All_Error_Lambda_Alpha = Averaged_All_Error_Lambda_Alpha + All_Error_Lambda_Alpha Averaged_All_Rwd_Lambda = np.true_divide(Averaged_All_Rwd_Lambda, Total_num_experiments) Averaged_All_Lambda_Alpha = np.true_divide(Averaged_All_Lambda_Alpha, Total_num_experiments) Averaged_All_Error_Lambda = np.true_divide(Averaged_All_Error_Lambda, Total_num_experiments) Averaged_All_Error_Lambda_Alpha = np.true_divide(Averaged_All_Error_Lambda_Alpha, Total_num_experiments) np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Linear_Approximator/Eligibility_Traces/Accumulating_Traces/Cliff_Walking_Results/' + 'Tree Backup(lambda)_RBF_' + 'Reward_Lambda_' + '.npy', Averaged_All_Rwd_Lambda) np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Linear_Approximator/Eligibility_Traces/Accumulating_Traces/Cliff_Walking_Results/' + 'Tree Backup(lambda)_RBF_' + 'Lambda_Alpha' + '.npy', Averaged_All_Lambda_Alpha) np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Linear_Approximator/Eligibility_Traces/Accumulating_Traces/Cliff_Walking_Results/' + 'Tree Backup(lambda)_RBF_' + 'Error_Lambda_' + '.npy', Averaged_All_Error_Lambda) np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Linear_Approximator/Eligibility_Traces/Accumulating_Traces/Cliff_Walking_Results/' + 'Tree Backup(lambda)_RBF_' + 'Error_Lambda_Alpha' + '.npy', Averaged_All_Error_Lambda_Alpha) # plotting.plot_episode_stats(stats_tree_lambda) env.close()
def main(): # print "SARSA" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_sarsa = sarsa(env, num_episodes) # rewards_sarsa = pd.Series(stats_sarsa.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_sarsa # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'SARSA' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_sarsa) # env.close() # print "Q Learning" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_q_learning = q_learning(env, num_episodes) # rewards_q_learning = pd.Series(stats_q_learning.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_q_learning # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q_Learning' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_q_learning) # env.close() # print "Double Q Learning" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_double_q_learning = double_q_learning(env, num_episodes) # rewards_double_q_learning = pd.Series(stats_double_q_learning.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_double_q_learning # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Double_Q_Learning' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_double_q_learning) # env.close() print "One Step Tree Backup (Expected SARSA)" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_expected_sarsa = one_step_tree_backup(env, num_episodes) rewards_expected_sarsa = pd.Series( stats_expected_sarsa.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_expected_sarsa np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'One_Step_Tree_Backup' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_expected_sarsa) env.close() print "Two Step Tree Backup" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_two_step_tree_backup = two_step_tree_backup(env, num_episodes) rewards_two_step_tree_backup = pd.Series( stats_two_step_tree_backup.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_two_step_tree_backup np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Two_Step_Tree_Backup' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_two_step_tree_backup) env.close() print "Three Step Tree Backup" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_three_step_tree_backup = three_step_tree_backup(env, num_episodes) rewards_three_step_tree_backup = pd.Series( stats_three_step_tree_backup.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_three_step_tree_backup np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Three_Step_Tree_Backup' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_three_step_tree_backup) env.close() print "Q(sigma) On Policy" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_q_sigma_on_policy = q_sigma_on_policy(env, num_episodes) rewards_stats_q_sigma_on_policy = pd.Series( stats_q_sigma_on_policy.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_stats_q_sigma_on_policy np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q_Sigma_On_Policy' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_q_sigma_on_policy) env.close() print "Q(sigma) Off Policy" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_q_sigma_off_policy = Q_Sigma_Off_Policy(env, num_episodes) rewards_stats_q_sigma_off_policy = pd.Series( stats_q_sigma_off_policy.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_stats_q_sigma_off_policy np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q_Sigma_Off_Policy' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_q_sigma_off_policy) env.close() print "Q(sigma) Off Policy 2 Step" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_q_sigma_off_policy_2_step = Q_Sigma_Off_Policy_2_Step( env, num_episodes) rewards_stats_q_sigma_off_policy_2 = pd.Series( stats_q_sigma_off_policy_2_step.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_stats_q_sigma_off_policy_2 np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q_Sigma_Off_Policy_2_Step' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_q_sigma_off_policy_2_step) env.close() print "Q(sigma) Off Policy 3 Step" env = CliffWalkingEnv() num_episodes = 2000 smoothing_window = 1 stats_q_sigma_off_policy_3_step = Q_Sigma_Off_Policy_3_Step( env, num_episodes) rewards_stats_q_sigma_off_policy_3 = pd.Series( stats_q_sigma_off_policy_3_step.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_stats_q_sigma_off_policy_3 np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q_Sigma_Off_Policy_3_Step' + '.npy', cum_rwd) # plotting.plot_episode_stats(stats_q_sigma_off_policy_3_step) env.close() # print "SARSA(lambda)" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_sarsa_lambda = sarsa_lambda(env, num_episodes) # rewards_stats_sarsa_lambda = pd.Series(stats_sarsa_lambda.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_stats_sarsa_lambda # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Sarsa(lambda)' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_sarsa_lambda) # env.close() # print "Watkins Q(lambda)" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_q_lambda = q_lambda_watkins(env, num_episodes) # rewards_stats_q_lambda = pd.Series(stats_q_lambda.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_stats_q_lambda # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Watkins Q(lambda)' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_q_lambda) # env.close() # print "Naive Q(lambda)" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_q_lambda_naive = q_lambda_naive(env, num_episodes) # rewards_stats_q_naive = pd.Series(stats_q_lambda_naive.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_stats_q_naive # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Naive Q(lambda)' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_q_lambda_naive) # env.close() # print "Tree Backup(lambda)" # env = CliffWalkingEnv() # num_episodes = 2000 # smoothing_window = 1 # stats_tree_lambda = tree_backup_lambda(env, num_episodes) # rewards_stats_tree_lambda = pd.Series(stats_tree_lambda.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean() # cum_rwd = rewards_stats_tree_lambda # np.save('/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Tree Backup(lambda)' + '.npy', cum_rwd) # # plotting.plot_episode_stats(stats_tree_lambda) # env.close() """ DOES NOT WORK FULLY YET """ print "Q(sigma)(lambda)" num_episodes = 2000 smoothing_window = 1 stats_q_sigma_lambda = q_sigma_lambda(env, num_episodes) rewards_stats_q_sigma_lambda = pd.Series( stats_q_sigma_lambda.episode_rewards).rolling( smoothing_window, min_periods=smoothing_window).mean() cum_rwd = rewards_stats_q_sigma_lambda np.save( '/Users/Riashat/Documents/PhD_Research/BASIC_ALGORITHMS/My_Implementations/Project_652/Code/Tabular/CliffWalking_Results/' + 'Q(sigma_lambda)' + '.npy', cum_rwd) plotting.plot_episode_stats(stats_q_sigma_lambda) env.close()