def run_agent_Q_RMS_param(num_runs, num_episodes, discount, step_size, step=1, agent_type="Sarsa"): """ Run the n-step Sarsa agent and return the avg Q-value RMS over episodes and runs """ mdp = RandomWalk(19, -1) s = mdp.init() # ground truth for value gt_v = np.asarray(mdp.Q_equiprobable(discount)[1:-1]) # Arrays for RMS error over all states rms_err = np.asarray([0.0] * num_episodes) sum_rms_err = 0.0 # create n-step agent print("Starting agent {}-step {}".format(step, agent_type)) if agent_type.lower() == "sarsa": agent = Sarsa(mdp, s, step) elif agent_type.lower() == "expsarsa": agent = ExpSARSA(mdp, s, step) elif agent_type.lower() == "treebackup": agent = TreeBackup(mdp, s, step) elif agent_type.lower() == "qsigma": agent = QSigma(mdp, 0.5, s, step) else: raise Exception("Wrong type of agent") for run in range(num_runs): for i in range(num_episodes): agent.episode(discount, step_size, 10000) agent.init() rms_err[i] = np.sqrt(np.mean(np.square(np.asarray(agent.Q[1:-1]) - gt_v))) sum_rms_err += np.sum(rms_err) # Reset Q after a run agent.reset_Q() # averaged over num_runs and num_episodes return sum_rms_err / (num_runs * num_episodes)
def run_agent_RMS_value(num_runs, num_episodes, discount, step_size, step=1): """ Run SARSA agent for num_episodes to get the state values """ mdp = RandomWalk(19, -1) s = mdp.init() # ground truth for value gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1]) # initial value init_v = np.asarray([0.5] * mdp.num_states())[1:-1] # Arrays for RMS error over all states rms_err = np.asarray([0.0] * (num_episodes + 1)) sum_rms_err = np.asarray([0.0] * (num_episodes + 1)) rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v))) # create n-step SARSA agent agent = Sarsa(mdp, s, step) for run in range(num_runs): for i in range(num_episodes): agent.episode(discount, step_size, 10000) agent.init() rms_err[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v))) sum_rms_err += rms_err # Reset Q after a run agent.reset_Q() # averaged over num_runs return sum_rms_err / num_runs
def print_value(): """ Print the RMS error on state values """ mdp = RandomWalk(19, 1) # ground truth for value gt_v = np.asarray(mdp.value_equiprobable(1.0)[1:-1]) # initial value init_v = np.asarray([0.5] * mdp.num_states())[1:-1] rms_err = np.sqrt(np.mean(np.square(init_v - gt_v))) print("RMS error is ", rms_err)
def main(): rw = RandomWalk() rw.fill_walk() fig, ax = plt.subplots() ax.plot(rw.x_values, rw.y_values, linewidth=5) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show()
def run_agent_value(num_episodes, discount, step_size, step=1): """ Run SARSA agent for num_episodes to get the state values""" mdp = RandomWalk(19) s = mdp.init() step = step # create n-step SARSA agent agent = Sarsa(mdp, s, step) for i in range(num_episodes): agent.episode(discount, step_size) agent.init() return agent.Q_to_value()
def benchmark(alg_class, title, fn, sub=3): fig, ax = plt.subplots() fig.suptitle(title, fontsize=BIG_FONT) fig.set_size_inches(20, 14) def vhat(s, w): return vhat_st_agg(s, w, FIG_12_3_N_ST) def nab_vhat(s, w): return nab_vhat_st_agg(s, w, FIG_12_3_N_ST) alg = alg_class(RandomWalk(), None, FIG_12_3_N_ST, None, vhat, nab_vhat, FIG_12_3_G) xticks, yticks = np.linspace(0, 1, 6), np.linspace(0.25, 0.55, 7) def short_str(x): return str(x)[:3] xnames, ynames = map(short_str, xticks), map(short_str, yticks) run_random_walks(ax, alg, FIG_12_3_LAM_L, FIG_12_3_N_EP, FIG_12_3_N_RUNS, sub) plot_figure(ax, '', xticks, xnames, 'alpha', yticks, ynames, (f'Average\nRMS error\n({FIG_12_3_N_ST} states,\n ' + f'{FIG_12_3_N_EP} episodes)'), font=MED_FONT, labelpad=40, loc='upper right') save_plot(fn, dpi=100) plt.show()
def fig_9_2(): fig = plt.figure() fig.suptitle('Figure 9.2') env = RandomWalk() pi = {(EMPTY_MOVE, s): 1 for s in env.states} true_vals = get_true_vals(env, pi) semi_grad_td = SemiGradientTD0(env, FIG_9_2_ALP, FIG_9_2_W_DIM_L) semi_grad_td.seed(0) semi_grad_td.pol_eva(pi, vhat_st_agg, nab_vhat_st_agg, FIG_9_2_N_EP_L, FIG_9_2_G) est_vals = [vhat_st_agg(s, semi_grad_td.w) for s in env.states][:-1] ax1 = fig.add_subplot('121') ax1.plot(est_vals, 'b', label='Approximate TD value vhat') ax1.plot(true_vals, 'r', label='True value v_pi') plot_figure(ax1, '', [0, 999], [1, 1000], 'State', [-1, 0, 1], [-1, 0, 1], '\n\nValue\nScale') nstep_semi_grad = nStepSemiGrad(env, None, FIG_9_2_W_DIM_R, FIG_9_2_G, 0) ax2 = fig.add_subplot('122') param_study(ax2, nstep_semi_grad, pi, vhat_st_agg, nab_vhat_st_agg, FIG_9_2_N_EP_R, FIG_9_2_N_RUNS_R, true_vals=true_vals, max_n=FIG_9_2_MAX_N, gamma=FIG_9_2_G) plt.legend() fig.set_size_inches(20, 14) save_plot('fig9.2', dpi=100) plt.show()
def fig_7_2(): fig, ax = plt.subplots() ax.set_title('Figure 7.2') n_l = [1] alpha_l = np.linspace(0, 1, 11) env = RandomWalk(n_states=FIG_7_2_N_STATES) pi = {(a, s): 1.0 for s in env.states for a in env.moves_d[s]} true_vals = true_values(env.n_states) for n in n_l: print(f">> n={n}") err_l = [] for alpha in alpha_l: print(f"alpha={alpha}") err_sum = 0 alg = nStepTD(env, V_init=None, step_size=alpha, gamma=UND, n=n) for seed in range(FIG_7_2_N_RUNS): alg.reset() alg.seed(seed) #alg.pol_eval(pi, n_ep=FIG_7_2_N_EP) alg.simple_td(pi, n_ep=FIG_7_2_N_EP) v_arr = np.array(alg.get_value_list()[:-1]) err_sum += np.linalg.norm(v_arr - true_vals) err_l.append(err_sum / FIG_7_2_N_RUNS) plt.plot(alpha_l, err_l, label=f'n={n}') ax.set_xticks(np.linspace(0, 1, 6)) ax.set_xlabel('Stepsize') ax.set_ylabel( f'Average RMS error ({FIG_7_2_N_STATES} states, first {FIG_7_2_N_EP} episodes)' ) plt.legend() plt.show()
def init_random_walk(init_value, step_size=None): env = RandomWalk() pi = {(a, s): 1.0 for s in env.states for a in env.moves} V_0 = [init_value for s in env.states[:-1]] + [0] # V = 0 for absorbing state V_init = {s: V_0[idx] for (idx, s) in enumerate(env.states)} alg = OneStepTD(env, V_init=V_init, step_size=step_size, gamma=UNDISCOUNTED) return alg, pi
def run_random_walks(ax, ex_7_2=False, show=True, extra_label='', dashed=False, n_runs=FIG_7_2_N_RUNS, n_states=FIG_7_2_N_STATES, left_rew=-1, true_vals=None, V_init=None): n_l = [2**k for k in range(int(np.log(FIG_7_2_MAX_N) / np.log(2)) + 1)] env = RandomWalk(n_states=n_states, r_l=left_rew) pi = {(a, s): 1.0 for s in env.states for a in env.moves_d[s]} true_vals = np.linspace(-1, 1, env.n_states + 2)[1:-1] if true_vals is None else true_vals alg = nStepTD(env, V_init=V_init, step_size=None, gamma=UND, n=n_l[0], ex_7_2=ex_7_2) for n in n_l: alg.n = n print(f">> n={n}") err_l = [] alpha_max = 1 if (n <= 16 or ex_7_2) else 1 / (np.log(n // 8) / np.log(2)) alpha_l = np.linspace(0, alpha_max, 31) for alpha in alpha_l: alg.step_size = alpha print(f"alpha={alpha}") err_sum = 0 for seed in range(n_runs): alg.reset() alg.seed(seed) for ep in range(FIG_7_2_N_EP): alg.pol_eval(pi, n_ep=1) v_arr = np.array(alg.get_value_list()[:-1]) err_sum += np.sqrt( np.sum((v_arr - true_vals)**2) / env.n_states) err_l.append(err_sum / (n_runs * FIG_7_2_N_EP)) plt.plot(alpha_l, err_l, label=f'{extra_label} n={n}', linestyle='dashed' if dashed else None) ax.set_xticks(np.linspace(0, 1, 6)) yticks = np.linspace(0.25, 0.55, 6) ax.set_yticks(yticks) ax.set_ylim([min(yticks), max(yticks)]) ax.set_xlabel('Stepsize') ax.set_ylabel( f'Average RMS error ({env.n_states} states, first {FIG_7_2_N_EP} episodes)' )
def example_randomwalk(): # create an MDP env = RandomWalk(19) # create 1-step SARSA agent agent = Sarsa(env, env.init(), 1) agent2 = Sarsa(env, env.init(), 1) # act using equiprobable random policy with discount = 0.9 and step size = 0.1 num_episode = 100 for iter in range(num_episode): agent.episode(0.9, 0.1) agent.init() agent2.set_policy_eps_greedy(0.5) for iter in range(num_episode): agent2.episode(0.9, 0.1) agent2.init() print('Equiprobable Q_SARSA[s][a]', agent.Q) print('Eps greedy Q_SARSA[s][a]', agent2.Q)
def fig_9_5(): fig, ax = plt.subplots() env = RandomWalk() pi = {(EMPTY_MOVE, s): 1 for s in env.states} true_vals = get_true_vals(env, pi) for (feat, alp, label) in [(poly_feat, FIG_9_5_ALP_POL, 'polynomial basis'), (four_feat, FIG_9_5_ALP_FOU, 'fourier basis')]: for base in FIG_9_5_BAS: def vhat(s, w): return np.dot(w, feat(s / 1000, base)) def nab_vhat(s, w): return feat(s / 1000, base) w_dim = base + 1 grad_mc = GradientMC(env, alp, w_dim) err_sum = np.zeros(FIG_9_5_N_EP) for seed in range(FIG_9_5_N_RUNS): print(f"seed={seed}") grad_mc.reset() grad_mc.seed(seed) err_per_ep = [] for ep in range(FIG_9_5_N_EP): if ep % 100 == 0 and ep > 0: print(ep) grad_mc.pol_eva(pi, vhat, nab_vhat, n_ep=1, gamma=FIG_9_5_G) est_vals = [vhat(s, grad_mc.w) for s in env.states][:-1] err_per_ep.append( np.sqrt( np.dot(grad_mc.mu[:-1], (est_vals - true_vals[:-1])**2))) err_sum += err_per_ep plt.plot(err_sum / FIG_9_5_N_RUNS, label=f'{label}, n={base}') plt.legend() plot_figure(ax, 'Figure 9.5', [0, 5000], [0, 5000], "Episodes", [0, 0.1, 0.2, 0.3, 0.4], ['0', '0.1', '0.2', '0.3', '0.4'], f"Square-Root\nValue Error\n({FIG_9_5_N_RUNS} runs)", labelpad=30, font=MED_FONT, loc='lower left') fig.set_size_inches(20, 14) save_plot('fig9.5', dpi=100) plt.show()
def plot_value(): """ Plot the state values for random walk, found by Sarsa agent. Compare the effect on n-step. """ num_episodes = 200 discount = 1 step_size = 0.1 value_list = [] for step in [2 ** x for x in range(5)]: value_list.append(run_agent_value(num_episodes, discount, step_size, step)) mdp = RandomWalk(19) # ground truth for V gt_v = mdp.value_equiprobable(discount) # plot the value plt.plot(range(1, 20), gt_v[1:-1], 'ro-', label="True Value") colors = ["y", "b", "g", "m", "c"] for i, value in enumerate(value_list): plt.plot(range(1, 20), value[1:-1], 'o-', color=colors[i], label="{}-step SARSA".format(2 ** i)) plt.legend(loc="upper left") plt.xlabel("State") plt.title("Value estimation of n-step Sarsa after {} episodes".format(num_episodes)) plt.show()
def main(): """Root function.""" while True: rw = RandomWalk() rw.fill_walk() fig, ax = plt.subplots() point_numbers = range(rw.num_points) ax.scatter(rw.x_values, rw.y_values, s=5, c=point_numbers, cmap=plt.cm.Blues, edgecolors='none') ax.scatter(0, 0, c='green', s=50) ax.scatter(rw.x_values[-1], rw.y_values[-1], c='red', s=50) plt.show() keep_running = input('Do you want to make another walk? (y/n): ') if keep_running.lower() == 'n': break
def fig_7_2(): fig, ax = plt.subplots() ax.set_title('Figure 7.2') n_l = [2**k for k in range(int(np.log(512) / np.log(2)) + 1)] env = RandomWalk(n_states=FIG_7_2_N_STATES) pi = {(a, s): 1.0 for s in env.states for a in env.moves_d[s]} true_vals = np.linspace(-1, 1, env.n_states + 2)[1:-1] alg = nStepTD(env, V_init=None, step_size=None, gamma=UND, n=None) for n in n_l: alg.n = n print(f">> n={n}") err_l = [] alpha_max = 1 if n <= 16 else 1 / (np.log(n // 8) / np.log(2)) alpha_l = np.linspace(0, alpha_max, 31) for alpha in alpha_l: alg.step_size = alpha print(f"alpha={alpha}") err_sum = 0 for seed in range(FIG_7_2_N_RUNS): alg.reset() alg.seed(seed) for ep in range(FIG_7_2_N_EP): alg.pol_eval(pi, n_ep=1) v_arr = np.array(alg.get_value_list()[:-1]) err_sum += np.sqrt( np.sum((v_arr - true_vals)**2) / FIG_7_2_N_STATES) err_l.append(err_sum / (FIG_7_2_N_RUNS * FIG_7_2_N_EP)) plt.plot(alpha_l, err_l, label=f'n={n}') ax.set_xticks(np.linspace(0, 1, 6)) yticks = np.linspace(0.25, 0.55, 6) ax.set_yticks(yticks) ax.set_ylim([min(yticks), max(yticks)]) ax.set_xlabel('Stepsize') ax.set_ylabel( f'Average RMS error ({FIG_7_2_N_STATES} states, first {FIG_7_2_N_EP} episodes)' ) plt.legend(fontsize='x-small') plt.show()
def fig_9_1(): env = RandomWalk() pi = {(EMPTY_MOVE, s): 1 for s in env.states} true_vals = get_true_vals(env, pi) grad_mc = GradientMC(env, FIG_9_1_ALP, FIG_9_1_W_DIM) grad_mc.seed(0) grad_mc.pol_eva(pi, vhat_st_agg, nab_vhat_st_agg, FIG_9_1_N_EP, FIG_9_1_G) est_vals = [vhat_st_agg(s, grad_mc.w) for s in env.states][:-1] fig, ax1 = plt.subplots() ax1.plot(est_vals, 'b', label='Approximate MC value vhat') ax1.plot(true_vals, 'r', label='True value v_pi') plot_figure(ax1, 'Figure 9.1', [0, 999], [1, 1000], 'State', [-1, 0, 1], [-1, 0, 1], '\n\nValue\nScale') ax2 = ax1.twinx() ax2.set_yticks([0, 0.0017, 0.0137]) ax2.set_ylabel('Distribution\nscale', rotation=0, fontsize=MED_FONT) ax2.plot(grad_mc.mu[:-1], 'm', label='State distribution mu') plt.legend() fig.set_size_inches(20, 14) save_plot('fig9.1', dpi=100) plt.show()
def run_agent_RMS_Q(num_runs, num_episodes, discount, step_size, step=1): """ Run SARSA agent for num_episodes to get the Q values """ mdp = RandomWalk(19) s = mdp.init() # ground truth for Q gt_Q = np.asarray(mdp.Q_equiprobable(discount)[1:-1]) gt_Q_left = gt_Q[:, 0] gt_Q_right = gt_Q[:, 1] v = np.asarray([0.5] * mdp.num_states()) v[0], v[-1] = 0.0, 0.0 init_Q_left = np.asarray(mdp.value_to_Q(v, discount)[1:-1])[:, 0] init_Q_right = np.asarray(mdp.value_to_Q(v, discount)[1:-1])[:, 1] # Arrays for RMS error over all states rms_err_left = np.asarray([0.0] * (num_episodes + 1)) # Q[left] rms_err_right = np.asarray([0.0] * (num_episodes + 1)) # Q[right] sum_rms_err_left = np.asarray([0.0] * (num_episodes + 1)) sum_rms_err_right = np.asarray([0.0] * (num_episodes + 1)) rms_err_left[0] = np.sqrt(np.mean(np.square(init_Q_left - gt_Q_left))) rms_err_right[0] = np.sqrt(np.mean(np.square(init_Q_right - gt_Q_right))) # create n-step SARSA agent agent = Sarsa(mdp, s, step) for run in range(num_runs): for i in range(num_episodes): agent.episode(discount, step_size, 10000) agent.init() rms_err_left[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q[1:-1])[:, 0] - gt_Q_left))) rms_err_right[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q[1:-1])[:, 1] - gt_Q_right))) sum_rms_err_left += rms_err_left sum_rms_err_right += rms_err_right # Reset Q after a run agent.reset_Q() # averaged over num_runs return sum_rms_err_left / num_runs, sum_rms_err_right / num_runs
def decay_agent(n=1, alpha=0.5, episodes=100, ep_start=30, decay=0.7): """ Run an agent for specified n-step Qsigma method with sigma decay""" mdp = RandomWalk(19, -1) s = mdp.init() num_runs = 250 num_episodes = episodes discount = 1.0 step_size = alpha steps = n # Arrays for sum of rewards for each episodes Q_opt = mdp.Q_equiprobable(1.0) rms_err = 0.0 # create n-step Qsigma agent agent = QSigma(mdp, 1.0, s, steps) agent.set_policy_equiprobable() for run in range(num_runs): sqerr = 0.0 agent._Psigma = 1.0 for i in range(num_episodes): if i > ep_start: agent._Psigma *= decay agent.episode(discount, step_size) agent.init() count = 0 for s in range(mdp.num_states()): for a in range(mdp.num_actions(s)): count += 1 sqerr += (1 / count) * ( (agent.Q[s][a] - Q_opt[s][a])**2 - sqerr) rms_err += sqerr**0.5 # Reset Q after a run agent.reset_Q() rms_err /= num_runs return rms_err
def example_randomwalk(): """ An example on random walk MDP """ # create an MDP env = RandomWalk(19, -1) # create n-step TreeBackup agent agent = TreeBackup(env, env.init(), 3) agent2 = TreeBackup(env, env.init(), 3) # act using equiprobable random policy with discount = 0.9 and step size = 0.1 num_episode = 1000 for iter in range(num_episode): agent.episode(0.9, 0.1) agent.init() agent2.set_policy_eps_greedy(0.1) for iter in range(num_episode): agent2.episode(0.9, 0.1) agent2.init() print('Q_DP[s][a] ', env.Q_equiprobable(0.9)) print('Q_eps_greedy[s][a] ', env.Q_eps_greedy(0.1, 0.9)) print('Equiprobable Q_TreeBackup[s][a]', agent.Q) print('Eps greedy Q_TreeBackup[s][a]', agent2.Q)
def example_randomwalk(): """ An example on random walk MDP """ # create an MDP env = RandomWalk(19, -1) # create n-step QSigma agent agent = QSigma(env, 0.5, env.init(), 3) #Psigma=0.5, init_state=env.init(), steps=3 agent2 = QSigma(env, 0.5, env.init(), 3) # act using equiprobable random policy with discount = 0.9 and step size = 0.1 num_episode = 1000 for iter in range(num_episode): agent.episode(0.9, 0.1) agent.init() agent2.set_policy_eps_greedy(0.1) for iter in range(num_episode): agent2.episode(0.9, 0.1) agent2.init() print('Q_DP[s][a] ', env.Q_equiprobable(0.9)) print('Q_eps_greedy[s][a] ', env.Q_eps_greedy(0.1, 0.9)) print('Equiprobable Q_Q(sigma)[s][a]', agent.Q) print('Eps greedy Q_Q(sigma)[s][a]', agent2.Q)
# /* Init TCP server, __hosting process and request function */ mainlogger.info('Initialising TCP server...') tcp = TCP_server(me.enode, me.ip, tcpPort) # /* Init E-RANDB __listening process and transmit function mainlogger.info('Initialising RandB board...') erb = ERANDB(erbDist, me.id, erbtFreq) # /* Init Ground-Sensors, __mapping process and vote function */ mainlogger.info('Initialising ground-sensors...') gs = GroundSensor(gsFreq) # /* Init Random-Walk, __walking process */ mainlogger.info('Initialising random-walk...') rw = RandomWalk(rwSpeed) # /* Init LEDs */ rgb = RGBLEDs() # List of submodules --> iterate .start() to start all submodules = [w3.geth.miner, tcp, erb, gs, rw, pb] # /* Define Main-modules */ ####################################################################### # The 4 Main-modules: # "Estimate" Rate 1Hz) queries groundsensor and generates a robot estimate (opinion) # "Buffer" (Rate 1Hz) queries RandB to get neighbor identities and add/remove on Geth # "Vote" (Rate 1/45Hz) communicate with geth to send votes every 45s # "Event" (Every block) when a new block is detected make blockchain queries/sends/log block data
import matplotlib.pyplot as plt from randomwalk import RandomWalk while True: rw_visual = RandomWalk() rw_visual.fill_walk() point_numbers = list(range(rw_visual.numpoints)) # c=point_numbers: gradient from first position to ending position plt.figure(figsize=(10, 6)) plt.scatter(rw_visual.x_values, rw_visual.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolor='none', s=10) plt.scatter(rw_visual.x_values[0], rw_visual.y_values[0], c='green', s=10) plt.scatter(rw_visual.x_values[-1], rw_visual.y_values[-1], c='red', s=10) plt.axes().get_xaxis().set_visible(False) plt.axes().get_yaxis().set_visible(False) plt.show() user_input = input('Do you want to make another plot? y/n ') if user_input.lower() == 'n': break
import time from erandb import ERANDB from randomwalk import RandomWalk from rgbleds import RGBLEDs from groundsensor import GroundSensor id = open("/boot/pi-puck_id", "r").read().strip() print('Initialising E-RANDB board...') erb = ERANDB(100) print('Initialising Random-Walk...') rw = RandomWalk(500) print('Initialising Ground-Sensors...') gs = GroundSensor(100) # /* Init LEDs */ rgb = RGBLEDs() erb.start() rw.start() gs.start() counts = 0 newValues = 0 while 1: erb.transmit(id) for newId in erb.getNew(): counts += 1 #print(str(newId))
import matplotlib.pyplot as plt from randomwalk import RandomWalk while True: rwalk = RandomWalk(50000) rwalk.walk() plt.scatter(rwalk.x_values,rwalk.y_values,c=rwalk.x_values, cmap = plt.cm.Blues, edgecolor = 'none', s=2) #Marking the start and end points plt.scatter(0,0, c='green', s=20) plt.scatter(rwalk.x_values[-1],rwalk.y_values[-1], c='red', s=20) #Removing axes plt.axes().get_xaxis().set_visible(False) plt.axes().get_yaxis().set_visible(False) plt.show() active = input("Make another walk (y/n)? ") if active.lower() == 'n': break
import matplotlib.pyplot as plt from randomwalk import RandomWalk while True: rw = RandomWalk(200) rw.fill_walk() #set the size of the plotting window plt.figure(dpi=128, figsize=(10, 6)) # dpi = resolution of the figure point_numbers = list(range(rw.num_points)) plt.scatter(rw.x_values, rw.y_values, c=point_numbers, cmap=plt.cm.Blues, edgecolor='none', s=1) #graph first and last points bigger plt.scatter(0, 0, c='green', edgecolors='none', s=100) plt.scatter(rw.x_values[-1], rw.y_values[-1], c='red', edgecolors='none', s=100)
def fig_9_10(): fig, ax = plt.subplots() env = RandomWalk() pi = {(EMPTY_MOVE, s): 1 for s in env.states} true_vals = get_true_vals(env, pi) def feat_tile(s, offset, st_per_agg): if s < offset: return 0 return (s - offset) // st_per_agg + 1 def feat(s, st_per_agg, n_tiles): dx = st_per_agg // n_tiles if n_tiles > 1 else 0 ft_per_til = FIG_9_10_TOT_ST // st_per_agg + (n_tiles > 1) feat_arr = np.zeros(ft_per_til * n_tiles) for n in range(n_tiles): idx_min = n * ft_per_til s_id = feat_tile(s, n * dx, st_per_agg) - (n_tiles == 1) feat_arr[idx_min + s_id] = True return feat_arr.astype(bool) for (idx, n_tiles) in enumerate(FIG_9_10_TIL_L): def feat_vec(s): return feat(s, FIG_9_10_ST_AGG, n_tiles) def vhat(s, w): return np.sum(w[feat_vec(s)]) if s < FIG_9_10_TOT_ST else 0 def nab_vhat(s, w): return feat_vec(s) if s < FIG_9_10_TOT_ST else 0 w_dim = (FIG_9_10_TOT_ST // FIG_9_10_ST_AGG + (n_tiles > 1)) * n_tiles grad_mc = GradientMC(env, FIG_9_10_ALP_TIL_L[idx], w_dim) print(f"w_dim={w_dim}, alpha={grad_mc.a}, n_tiles={n_tiles}") err_sum = np.zeros(FIG_9_10_N_EP) for seed in range(FIG_9_10_N_RUNS): print(f"seed={seed}") grad_mc.reset() grad_mc.seed(seed) err_per_ep = [] for ep in range(FIG_9_10_N_EP): if ep % 100 == 0 and ep > 0: print(ep) grad_mc.pol_eva(pi, vhat, nab_vhat, n_ep=1, gamma=FIG_9_10_G) est_vals = [vhat(s, grad_mc.w) for s in env.states][:-1] err_per_ep.append( np.sqrt( np.dot(grad_mc.mu[:-1], (est_vals - true_vals[:-1])**2))) err_sum += np.array(err_per_ep) plt.plot(err_sum / FIG_9_10_N_RUNS, label=(('State Aggregation' if (n_tiles > 1) else 'Tile Coding') + f' ({n_tiles} tile{"s" * (n_tiles > 1)})')) plt.legend() plot_figure(ax, 'Figure 9.10', [0, 5000], [0, 5000], "Episodes", [0, 0.1, 0.2, 0.3, 0.4], ['0', '0.1', '0.2', '0.3', '0.4'], f"Square-Root\nValue Error\n({FIG_9_10_N_RUNS} runs)", labelpad=30, font=MED_FONT, loc='lower left') fig.set_size_inches(20, 14) save_plot('fig9.10', dpi=100) plt.show()
import os import re import subprocess from datetime import datetime from randomwalk import RandomWalk print("Generating a random walk...") random_walk = RandomWalk() length = random_walk.length steps = random_walk.config.steps date_time = re.sub('[^0-9]', '_', str(datetime.now())) output_directory = "{0}/{1}".format(random_walk.config.output_directory, date_time) os.mkdir(output_directory) data_filename = output_directory + "/walk.txt" avi_filename = output_directory + "/walk.avi" print("A random walk generated successfully. Exporting data to '{0}' file...". format(data_filename)) random_walk.export_walk(data_filename) print("Exporting the plot to '{0}/' directory...".format(output_directory)) random_walk.make_plot(output_directory, steps) print( "Generating a sound wave to '{0}/' directory...".format(output_directory)) random_walk.make_sound(output_directory, steps) print("Converting images to '{0}' avi file...".format(avi_filename)) subprocess.Popen(
resume = False n_runs = 100 n_eps = 100 ns = [1, 3, 5] alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] #sigmas = [0.0, 0.25, 0.5, 0.75, 1.0, -0.95] sigmas = [-0.9] # def __init__(self, steps=1, init_sigma=1.0,step_size=0.1, beta=1.0): gamma = 0.9 max_steps = 10000 # WANT RMS ERROR mdp = RandomWalk(19, -1) Q_opt = mdp.Q_equiprobable(gamma) Q_opt[0] = [0, 0] Q_opt[20] = [0, 0] Q_opt = np.array(Q_opt)[1:-1] if resume: R_final = pickle.load(open('rwQsig_R_final.p', 'rb')) else: R_final = np.array( [[[[[0.0] * n_eps] * n_runs] * len(sigmas)] * len(alphas)] * len(ns)) # R_final[steps, alpha, sigma, run, ep] for n, steps in enumerate(ns): for a, alpha in enumerate(alphas): for s, sigma in enumerate(sigmas):
import matplotlib.pyplot as plt from randomwalk import RandomWalk #Make a random walk , and plot the points while True: #Creacion de objeto y llamada al metodo rw = RandomWalk(50000) rw.fill_walk() #Generate scatter point_numbers = list(range(rw.num_points)) plt.scatter(rw.x_values,rw.y_values,c=point_numbers, cmap=plt.cm.Blues,edgecolor='none',s=1) #Emphasize the first and last points. plt.scatter(0,0,c='green',edgecolor='none',s=100) plt.scatter(rw.x_values[-1],rw.y_values[-1],c='red',edgecolor='none',s=100) plt.show() keep_running = input("Make another walk? (y/n): ") if keep_running == 'n': break
hist.y_title = 'Frequency of Result' hist.add('D6 + D6 + D6', result_frequency) hist.render_to_file('visual_3.svg') # Matplotlib to create a die-rolling visualisation plt.figure(figsize=(10, 6)) plt.scatter([x for x in range(2, max(results) + 1)], result_frequency, s=15) plt.xlabel('D6 + D6', fontsize=14) plt.ylabel('Frequency', fontsize=14) plt.show() # Pygal to create a visualisation for random walk (count how many times in the same coordinate) journey = RandomWalk(numpoints=50) journey.fill_walk() sorted_journey_set = sorted(set(journey.x_values)) same_coordinates_count = [] for x in sorted_journey_set: same_coordinates_count.append(journey.x_values.count(x)) hist = pygal.Bar() hist.x_title = 'x coordinate' hist.y_title = 'count' hist.x_labels = [str(x) for x in sorted_journey_set] hist.add('', same_coordinates_count) hist.render_to_file('randomwalk.svg')