from gym import make import gym_workflow.envs from collections import defaultdict, OrderedDict import numpy as np from agents.strategy.td import TD import agents.utils.plotting as plt if __name__ == '__main__': env = make('Montage-v1') episodes = 100 # Q Learning Q, stats, records = TD.q_learning( env, episodes, discount_factor=0.7, epsilon=0.3, training_episode=0, log_file="exp1-cn-training-0-epi-{}-vm-10.csv".format(episodes), ) sQ = OrderedDict(sorted(Q.items()), key=lambda i: keyorder.index(i[0])) # plt.overhead_visualization( # records['overhead'], xlabel='Cycle', ylabel="Overhead(s)", # title="Overhead(s) across episodes" # ) plt.v1_plot_action_value( sQ, title="Experiment 1: Q-Value Overview - %s episodes" % episodes) plt.v1_plot_episode_stats(stats)
if __name__ == '__main__': env = make('Montage-v11') episodes = 100 log_pre = "p2-exp1-epi-{}-train-0-maintain-all-terminal-20-M10-B20.csv".format( episodes) # log_pre = "exp-3-epi-{}-train-10-maintain-all".format(episodes) # log_pre = "exp-3-epi-{}-train-0-maintain-smallest".format(episodes) # log_pre = "exp-3-epi-{}-train-0-maintain-largest".format(episodes) # # Q Learning Q, stats, records = TD.q_learning( env, episodes, discount_factor=0.7, epsilon=0.3, training_episode=0, log_file=log_pre, ) sQ = OrderedDict(sorted(Q.items()), key=lambda i: keyorder.index(i[0])) plt.plot_exp_2_action_value(sQ, title="Q-Value Overview - %s episodes" % episodes, opt_lower=8, opt_high=11, xlim=11, ylim=11) plt.plot_episode_stats(stats, smoothing_window=10)
from collections import defaultdict, OrderedDict import numpy as np from agents.strategy.td import TD import agents.utils.plotting as plt if __name__ == '__main__': # env = make('Montage-v8') env = make('Montage-v12') episodes = 500 # Q Learning Q, stats, records = TD.q_learning( env, episodes, discount_factor=0.7, epsilon=0.3, training_episode=0, log_file="p3-exp1-training-epi-{}-vm-100-v12-B10-publication.csv". format(episodes), ) sQ = OrderedDict(sorted(Q.items())) plt.plot_exp_3_action_value( sQ, title="exp-4-v12-epi-{}-vm-100".format(episodes)) # plt.overall_records_visualization(records['benchmark'], xlabel="Cycle", ylabel="Benchmark Makespan(s)", # title="Experiment 3: Benchmark Makespan over Cycle ({} Episodes)".format( # episodes)) # # plt.overall_records_visualization( # records['makespan'], xlabel='Cycle', ylabel="Makespan(s)", # title="Experiment 3: Makespan(s) across Cycle ({} Episodes)".format(episodes)
from gym import make import gym_workflow.envs from collections import defaultdict import numpy as np from agents.strategy.td import TD import agents.utils.plotting as plt if __name__ == '__main__': env = make('Montage-v4') episodes = 1000 Q, stats = TD.sarsa(env, episodes) V = defaultdict(float) for state, action_values in Q.items(): action_value = np.max(action_values) V[state] = action_value plt.plot_value_function( V, title="SARSA: Value Function representation - %s episodes" % episodes) print(Q) plt.plot_episode_stats(stats)