def main(): nProcess = multi.cpu_count() name = ["MCSteps", "SSteps", "Qsteps"]# Change this! algQs=list() algrews=list() steps = [250000] * 4 for i, agent in enumerate([MonteCarlo(env_in=EZ21()), SARSA(env_in=EZ21()), QLearn(env_in=EZ21())]): print(name[i]) Qs = list() algrews.append([]) run_sum = 0 for eps in steps: run_sum += eps print(run_sum) agent.n = eps agent.iter_opt() Qs.append(deepcopy(agent.Q)) with multi.Pool(nProcess) as pool: algrews.append(pool.map(play_rounds, algQs[i])) with open(name[i] + "_algQs", 'wb') as myfile: pickle.dump(algQs, myfile) with open(name[i] + "_algrews", 'wb') as myfile: pickle.dump(algrews, myfile) return
def play_rounds(Q_in): env = EZ21() results = list() for i in range(250000): state = tuple(env.reset()) while True: act = greedy_pol(Q_in, state) _state, rew, term = env.step(act) # Holding next state in buffer variable if term: results.append(rew) break state = tuple(_state) return results
import seaborn as sns import pandas as pd from decimal import Decimal from sarsa import SARSA from q_learn import QLearn from matplotlib.lines import Line2D with open(r'C:\Source_files\Python\Pantry\MCQs', "rb") as f: MQs = pickle.load(f) with open(r'C:\Source_files\Python\Pantry\SARSA_Qs', "rb") as f: SARSAQs = pickle.load(f) with open(r'C:\Source_files\Python\BBB\QLong_Qs', "rb") as f: QLearnQs = pickle.load(f) Qs = zip(MQs, SARSAQs, QLearnQs) MC = MonteCarlo(env_in=EZ21()) SRS = SARSA(env_in=EZ21()) QL = QLearn(env_in=EZ21()) MCrews = list() SARSArews = list() QRews = list() for j, (M, S, L) in enumerate(Qs): if j >= 50: break print(j) MCrews.append([]) SARSArews.append([]) QRews.append([]) MC.reset(Q_in=M) SRS.reset(Q_in=S) QL.reset(Q_in=L)
from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D import time method = "MC" with open(r'C:\Source_files\Python\BBB\MC100000_Qs', "rb") as f: Qs = pickle.load(f) with open(r'C:\Source_files\Python\Pantry\MCQs', "rb") as f: Qs += tuple(pickle.load(f)) s1 = np.arange(10) + 1 s2 = np.arange(21) + 1 ss1, ss2 = np.meshgrid(s1, s2, indexing='ij') EZ = EZ21() MC = MonteCarlo(env_in=EZ) fig = plt.figure(figsize=(10, 6)) V_MCs = list() Pols = list() Pol = [] V_MC = [] for j, Q in enumerate(Qs): MC.reset(Q) V_MC = np.zeros([10, 21]) Pol = np.zeros([10, 21]) for row in range(10): for col in range(21): V_MC[row, col] = MC.get_V((row + 1, col + 1)) Pol[row, col] = MC.greedy_pol((row + 1, col + 1)) V_MCs.append(V_MC)
def continue_Q(Q_in): if "1" in multi.current_process().name: print(('%s began working' % multi.current_process().name)) agent = QLearn(Q_in=Q_in, env_in=EZ21(), n_in=100000) return agent.iter_opt(), agent.Q
def Monte(cnstnts=(None, None)): if "1" in multi.current_process().name: print(('%s began working' % multi.current_process().name)) agent = MonteCarlo(env_in=EZ21(), n_in=100000) return agent.iter_opt(), agent.Q
def On_Pol(cnstnts=(None, None)): if "1" in multi.current_process().name: print(('%s began working' % multi.current_process().name)) agent = SARSA(env_in=EZ21(), n_in=80000, cnst_par=cnstnts) return agent.iter_opt(), agent.Q
import numpy as np import matplotlib.pyplot as plt import pickle from monte_hall_2 import MonteCarlo from EZ21 import EZ21 import seaborn as sns import pandas as pd from decimal import Decimal from sarsa import SARSA from q_learn import QLearn from matplotlib.lines import Line2D with open(r'C:\Source_files\Python\BBB\Box_data', "rb") as f: data = pickle.load(f) MC = MonteCarlo(env_in=EZ21()) naive_rews = list() for i in range(20000): state = MC.env.reset() while True: state, rew, term = MC.env.step(MC.greedy_pol(tuple(state))) if term: naive_rews.append(rew) break legend_elements = [ Line2D([0], [0], marker='o', ls="None", alpha=0.8, label="Trained Policies",