コード例 #1
0
from gym import make
import gym_workflow.envs
from collections import defaultdict, OrderedDict
import numpy as np
from agents.strategy.td import TD
import agents.utils.plotting as plt

if __name__ == '__main__':
    env = make('Montage-v1')
    episodes = 100

    # Q Learning
    Q, stats, records = TD.q_learning(
        env,
        episodes,
        discount_factor=0.7,
        epsilon=0.3,
        training_episode=0,
        log_file="exp1-cn-training-0-epi-{}-vm-10.csv".format(episodes),
    )

    sQ = OrderedDict(sorted(Q.items()), key=lambda i: keyorder.index(i[0]))

    # plt.overhead_visualization(
    #     records['overhead'], xlabel='Cycle', ylabel="Overhead(s)",
    #     title="Overhead(s) across episodes"
    # )

    plt.v1_plot_action_value(
        sQ, title="Experiment 1: Q-Value Overview - %s episodes" % episodes)

    plt.v1_plot_episode_stats(stats)
コード例 #2
0
if __name__ == '__main__':
    env = make('Montage-v11')
    episodes = 100

    log_pre = "p2-exp1-epi-{}-train-0-maintain-all-terminal-20-M10-B20.csv".format(
        episodes)
    # log_pre = "exp-3-epi-{}-train-10-maintain-all".format(episodes)
    # log_pre = "exp-3-epi-{}-train-0-maintain-smallest".format(episodes)
    # log_pre = "exp-3-epi-{}-train-0-maintain-largest".format(episodes)
    #

    # Q Learning
    Q, stats, records = TD.q_learning(
        env,
        episodes,
        discount_factor=0.7,
        epsilon=0.3,
        training_episode=0,
        log_file=log_pre,
    )

    sQ = OrderedDict(sorted(Q.items()), key=lambda i: keyorder.index(i[0]))

    plt.plot_exp_2_action_value(sQ,
                                title="Q-Value Overview - %s episodes" %
                                episodes,
                                opt_lower=8,
                                opt_high=11,
                                xlim=11,
                                ylim=11)

    plt.plot_episode_stats(stats, smoothing_window=10)
コード例 #3
0
from collections import defaultdict, OrderedDict
import numpy as np
from agents.strategy.td import TD
import agents.utils.plotting as plt

if __name__ == '__main__':
    # env = make('Montage-v8')
    env = make('Montage-v12')
    episodes = 500

    # Q Learning
    Q, stats, records = TD.q_learning(
        env,
        episodes,
        discount_factor=0.7,
        epsilon=0.3,
        training_episode=0,
        log_file="p3-exp1-training-epi-{}-vm-100-v12-B10-publication.csv".
        format(episodes),
    )

    sQ = OrderedDict(sorted(Q.items()))
    plt.plot_exp_3_action_value(
        sQ, title="exp-4-v12-epi-{}-vm-100".format(episodes))
    # plt.overall_records_visualization(records['benchmark'], xlabel="Cycle", ylabel="Benchmark Makespan(s)",
    #                                   title="Experiment 3: Benchmark Makespan over Cycle ({} Episodes)".format(
    #                                       episodes))
    #
    # plt.overall_records_visualization(
    #     records['makespan'], xlabel='Cycle', ylabel="Makespan(s)",
    #     title="Experiment 3: Makespan(s) across Cycle ({} Episodes)".format(episodes)
コード例 #4
0
from gym import make
import gym_workflow.envs
from collections import defaultdict
import numpy as np
from agents.strategy.td import TD
import agents.utils.plotting as plt

if __name__ == '__main__':
    env = make('Montage-v4')
    episodes = 1000

    Q, stats = TD.sarsa(env, episodes)
    V = defaultdict(float)
    for state, action_values in Q.items():
        action_value = np.max(action_values)
        V[state] = action_value
    plt.plot_value_function(
        V,
        title="SARSA: Value Function representation - %s episodes" % episodes)
    print(Q)
    plt.plot_episode_stats(stats)