Beispiel #1
0
def inner_execution(envDesc, a, g, ep, e):
    env = gym.make(envDesc).env
    print("current alpha -> {}, gamma -> {}, epsilon -> {}, episodes -> {}".
          format(a, g, ep, e))
    qlearn = QLearning(env,
                       alpha=a,
                       gamma=g,
                       epsilon=ep,
                       epsilon_min=0.001,
                       epsilon_dec=0.9999,
                       episodes=e)
    q_table = qlearn.train(
        "grid_data/q_table_{}_alpha_{}_gamma_{}_ep{}_e{}.csv".format(
            envDesc, a, g, ep, e), None)

    rewards = 0
    for i in range(101):
        state = env.reset()
        train_done = False
        count = 0
        while (not train_done) and (count < 200):
            action = np.argmax(q_table[state])
            state, reward, train_done, _ = env.step(action)
            count += 1
            if reward == 1:
                rewards += 1

    r = np.array([a, g, ep, e, rewards])
    print(r)
    savetxt("grid_results/results_{}_alpha_{}_gamma_{}_ep{}_e{}".format(
        envDesc, a, g, ep, e),
            r,
            delimiter=',',
            newline="  ",
            fmt="%10.5f")
Beispiel #2
0
def inner_execution(env, envDesc, a, g, ep, e):
    print("current alpha -> {}, gamma -> {}, epsilon -> {}, episodes -> {}".
          format(a, g, ep, e))
    qlearn = QLearning(env,
                       alpha=a,
                       gamma=g,
                       epsilon=ep,
                       epsilon_min=0.001,
                       epsilon_dec=0.9999,
                       episodes=e)
    q_table = qlearn.train(
        "grid_data/q_table_{}_alpha_{}_gamma_{}_ep{}_e{}.csv".format(
            envDesc, a, g, ep, e),
        "grid_results/actions_{}_alpha_{}_gamma_{}_ep{}_e{}".format(
            envDesc, a, g, ep, e))

    rewards = 0
    for i in range(101):
        state = env.reset()
        train_done = False
        count = 0
        while (not train_done) and (count < 200):
            action = np.argmax(q_table[state])
            state, reward, train_done, _ = env.step(action)
            count += 1
            if reward == 1:
                rewards += 1

    self.results.append([a, g, ep, e, rewards])
Beispiel #3
0
# Implement Q-learning and use this to solve the cartpole-environment
import gym

# Source: https://github.com/JoeSnow7/Reinforcement-Learning/blob/master/Cartpole%20Q-learning.ipynb

# We define a class to contain the learning algorithm
from QLearning import QLearning

env = gym.make("CartPole-v0")
agent = QLearning(env)
agent.train()
agent.run()
Beispiel #4
0
# have a look at LearningPolicy.py for other policies
epsilon_policy = LearningPolicy.exponentially_annealed_epsilon(1 / 10000, 0.0)
epsilon_policy_2 = LearningPolicy.linear_annealed_epsilon(1., 0.1, 100)

alpha1 = 0.2
alpha2 = 0.1

hyperparameters = {"alpha": alpha2, "discount": 0.99}

# Please note: Numerous other settings can be adjusted in settings.py

if training_mode:
    q = QLearning(epsilon_policy=epsilon_policy_2,
                  map_name=map,
                  hyperparameters=hyperparameters,
                  save_name=save_name)
    while True:
        q.train()

else:
    q = QLearning(epsilon_policy=LearningPolicy.constant_epsilon(0),
                  map_name=map)

    if checkpoint_file is None:
        raise Exception("Please specify the checkpoint file path!")

    q_values = AgentManager.load_q_values(checkpoint_file)

    while True:
        q.test(q_values=q_values)
Beispiel #5
0
import matplotlib.pyplot as plt
from QLearning import QLearning
from numpy import loadtxt

def stateNumber(state):
        (x,y,z) = state
        y = y * 32
        z = z * 352
        return x+y+z

env = gym.make('Blackjack-v0')
for i in [0.01]:
    for g in [0.000001,0.00001,0.0001,0.001,0.01]:
        for epi in [600000,700000,800000]:
            qlearn = QLearning(env, alpha=i, gamma=g, epsilon=0.9,epsilon_min=0.01, epsilon_dec=0.99, episodes=epi)
            q_table = qlearn.train('data/q-table-blackjack.csv', 'results/blackjack')
#q_table = loadtxt('data/q-table-blackjack.csv', delimiter=',')

#state= env.reset()
#print(state) 
#state = stateNumber(state)
#done = False
#
#
#while not done:
#    action = np.argmax(q_table[state])
#    state, reward, done, info = env.step(action)
#    print(action)
#    print(state)
#    state = stateNumber(state)
#    
Beispiel #6
0
#         #print(frame['frame'].getvalue())
#         print(f"Timestep: {i + 1}")
#         print(f"State: {frame['state']}")
#         print(f"Action: {frame['action']}")
#         print(f"Reward: {frame['reward']}")
#         sleep(.1)

env = gym.make('Roulette-v0').env
#q_table = loadtxt('data/q-table-roulette.csv', delimiter=',')

#2600loss - stable
qlearn = QLearning(env, alpha=0.001, gamma=0.001, epsilon=0.9, epsilon_min=0.001, epsilon_dec=0.9999, episodes=1000000)

# 500-1000loss - real player like
#qlearn = QLearning(env, alpha=0.001, gamma=0.001, epsilon=0.9, epsilon_min=0.1, epsilon_dec=0.7, episodes=1000000)
q_table = qlearn.train('data/q-table-roulette.csv', None)

#q_table = loadtxt('data/q-table-roulette.csv', delimiter=',')

state = env.reset()
done = False
rewards = 0
actions = 0

while not done:
    action = np.argmax(q_table)
    state, reward, done, info = env.step(action)
    actions += 1   

    rewards += reward