Exemple #1
0
def main():
    nProcess = multi.cpu_count()
    name = ["MCSteps", "SSteps", "Qsteps"]# Change this!
    algQs=list()
    algrews=list()
    steps = [250000] * 4
    for i, agent in enumerate([MonteCarlo(env_in=EZ21()), SARSA(env_in=EZ21()), QLearn(env_in=EZ21())]):
        print(name[i])
        Qs = list()
        algrews.append([])
        run_sum = 0
        for eps in steps:
            run_sum += eps
            print(run_sum)
            agent.n = eps
            agent.iter_opt()
            Qs.append(deepcopy(agent.Q))
        with multi.Pool(nProcess) as pool:
            algrews.append(pool.map(play_rounds, algQs[i]))

        with open(name[i] + "_algQs", 'wb') as myfile:
            pickle.dump(algQs, myfile)
        with open(name[i] + "_algrews", 'wb') as myfile:
            pickle.dump(algrews, myfile)
    return
Exemple #2
0
def play_rounds(Q_in):
    env = EZ21()
    results = list()
    for i in range(250000):
        state = tuple(env.reset())
        while True:
            act = greedy_pol(Q_in, state)
            _state, rew, term = env.step(act)  # Holding next state in buffer variable
            if term:
                results.append(rew)
                break
            state = tuple(_state)
    return results
Exemple #3
0
import seaborn as sns
import pandas as pd
from decimal import Decimal
from sarsa import SARSA
from q_learn import QLearn
from matplotlib.lines import Line2D

with open(r'C:\Source_files\Python\Pantry\MCQs', "rb") as f:
    MQs = pickle.load(f)
with open(r'C:\Source_files\Python\Pantry\SARSA_Qs', "rb") as f:
    SARSAQs = pickle.load(f)
with open(r'C:\Source_files\Python\BBB\QLong_Qs', "rb") as f:
    QLearnQs = pickle.load(f)

Qs = zip(MQs, SARSAQs, QLearnQs)
MC = MonteCarlo(env_in=EZ21())
SRS = SARSA(env_in=EZ21())
QL = QLearn(env_in=EZ21())
MCrews = list()
SARSArews = list()
QRews = list()
for j, (M, S, L) in enumerate(Qs):
    if j >= 50:
        break
    print(j)
    MCrews.append([])
    SARSArews.append([])
    QRews.append([])
    MC.reset(Q_in=M)
    SRS.reset(Q_in=S)
    QL.reset(Q_in=L)
Exemple #4
0
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import time

method = "MC"

with open(r'C:\Source_files\Python\BBB\MC100000_Qs', "rb") as f:
    Qs = pickle.load(f)
with open(r'C:\Source_files\Python\Pantry\MCQs', "rb") as f:
    Qs += tuple(pickle.load(f))

s1 = np.arange(10) + 1
s2 = np.arange(21) + 1
ss1, ss2 = np.meshgrid(s1, s2, indexing='ij')

EZ = EZ21()
MC = MonteCarlo(env_in=EZ)
fig = plt.figure(figsize=(10, 6))
V_MCs = list()
Pols = list()
Pol = []
V_MC = []
for j, Q in enumerate(Qs):
    MC.reset(Q)
    V_MC = np.zeros([10, 21])
    Pol = np.zeros([10, 21])
    for row in range(10):
        for col in range(21):
            V_MC[row, col] = MC.get_V((row + 1, col + 1))
            Pol[row, col] = MC.greedy_pol((row + 1, col + 1))
    V_MCs.append(V_MC)
Exemple #5
0
def continue_Q(Q_in):
    if "1" in multi.current_process().name:
        print(('%s began working' % multi.current_process().name))
    agent = QLearn(Q_in=Q_in, env_in=EZ21(), n_in=100000)
    return agent.iter_opt(), agent.Q
Exemple #6
0
def Monte(cnstnts=(None, None)):
    if "1" in multi.current_process().name:
        print(('%s began working' % multi.current_process().name))
    agent = MonteCarlo(env_in=EZ21(), n_in=100000)
    return agent.iter_opt(), agent.Q
Exemple #7
0
def On_Pol(cnstnts=(None, None)):
    if "1" in multi.current_process().name:
        print(('%s began working' % multi.current_process().name))
    agent = SARSA(env_in=EZ21(), n_in=80000, cnst_par=cnstnts)
    return agent.iter_opt(), agent.Q
Exemple #8
0
import numpy as np
import matplotlib.pyplot as plt
import pickle
from monte_hall_2 import MonteCarlo
from EZ21 import EZ21
import seaborn as sns
import pandas as pd
from decimal import Decimal
from sarsa import SARSA
from q_learn import QLearn
from matplotlib.lines import Line2D

with open(r'C:\Source_files\Python\BBB\Box_data', "rb") as f:
    data = pickle.load(f)

MC = MonteCarlo(env_in=EZ21())
naive_rews = list()
for i in range(20000):
    state = MC.env.reset()
    while True:
        state, rew, term = MC.env.step(MC.greedy_pol(tuple(state)))
        if term:
            naive_rews.append(rew)
            break

legend_elements = [
    Line2D([0], [0],
           marker='o',
           ls="None",
           alpha=0.8,
           label="Trained Policies",