def plot_3d_trajectory(export=False, filename='q2-3d-trajectory.pdf'): """Simulate the 3d-system for 50 seconds and make a 3D plot of the trajectory. Arguments: export -- indicate if the plot should be PDF exported and saved (default False) filename -- exported PDF filename (default q2-3d-trajectory.pdf) """ xs, ys, zs = simulate(a=10, r=28, b=8 / 3, mu_0=(1, 1, 1), sigma_0=math.sqrt(0.001), dt=0.001, sigma_u=math.sqrt(0.0000001), Gamma=np.eye(3), t_tot=50) fig = plt.figure() ax = fig.gca(projection='3d') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.scatter(xs[0], ys[0], zs[0], color='g') ax.plot(xs, ys, zs) plt.show() if export: fig.savefig(PATH + filename, bbox_inches='tight', pad_inches=0)
def example(): """model example""" X1, X2, Y1, Y2, Y3 = symbols(["X1", "X2", "Y1", "Y2", "Y3"]) def define_equations(X1, X2): eq_Y1 = X1 eq_Y2 = X2 + 2 * Y1**2 eq_Y3 = Y1 + Y2 return eq_Y1, eq_Y2, eq_Y3 model_dat = { "define_equations": define_equations, # equations in topological order "xvars": [X1, X2], # exogenous variables in desired order "yvars": [Y1, Y2, Y3], # endogenous variables in topological order "ymvars": [Y3], # manifest endogenous variables "final_var": Y3, # final variable of interest, for mediation analysis "show_nr_indiv": 3, # show first individual effects "estimate_bias": True, # estimate equation biases, for model validation "alpha": None, # regularization parameter, is estimated if None "dof": None, # effective degrees of freedom, corresponding to alpha "dir_path": "output/", # output directory path } # simulate data import utils simulation_dat = { "xmean_true": [3, 2], # mean of exogeneous data "sigx_theo": 1, # true scalar error variance of xvars "sigym_theo": 1, # true scalar error variance of ymvars "rho": 0.2, # true correlation within y and within x vars "tau": 200, # nr. of simulated observations } model_dat.update(simulation_dat) xdat, ymdat = utils.simulate(model_dat) # save data # ============================================================================= # from numpy import savetxt # savetxt("data/xdat.csv", xdat, delimiter=",") # savetxt("data/ymdat.csv", ymdat, delimiter=",") # ============================================================================= # load data # ============================================================================= # from numpy import loadtxt # xdat = loadtxt("data/xdat.csv", delimiter=",").reshape(len(model_dat["xvars"]), -1) # ymdat = loadtxt("data/ymdat.csv", delimiter=",").reshape(len(model_dat["ymvars"]), -1) # ============================================================================= model_dat["xdat"] = xdat # exogenous data model_dat["ymdat"] = ymdat # manifest endogenous data return model_dat
def simulate( h, histtraj): #simulate strategies and add data to replay memory player = game.playerOfHist[h] curstgy = None if player == 2: curstgy = game.chanceprob[h] else: iset = game.Hist2Iset[player][h] curstgy = self.stgy[player][iset] a = np.random.choice(game.nactsOnHist[h], p=curstgy) nxth = game.histSucc[h][a] if game.isTerminal[nxth]: r = game.simulate(nxth) histtraj.append((h, a, nxth, r[0])) else: histtraj.append((h, a, nxth, 0.0)) simulate(nxth, histtraj)
def plot_ekf(filename): t_tot = 16 ts = 0.01 dt = 0.001 L = int(ts / dt) mu_0 = 1 sigma_0 = math.sqrt(0.001) sigma_u = math.sqrt(0.01) sigma_m = math.sqrt(1) a, r, b = 10, 28, 8 / 3 Gamma = np.eye(3) xs, ys, zs = simulate(t_tot, mu_0, sigma_0, a, r, b, dt, sigma_u, Gamma) xs_m = measure(xs, L, sigma_m) mu, cov = ekf(a, r, b, dt, sigma_u, Gamma, mu_0, sigma_0, ts, t_tot, xs_m, sigma_m) print(cov[int(5 / ts)][0, 0]) plot_trajectory(L, t_tot, dt, xs, xs_m, mu[:, 0], 'x', filename[0]) plot_trajectory(L, t_tot, dt, ys, None, mu[:, 1], 'y', filename[1]) plot_trajectory(L, t_tot, dt, zs, None, mu[:, 2], 'z', filename[2]) # Error function fig, ax = plt.subplots() x_real = np.empty((int(t_tot / ts) + 1, 3)) x_real[:, 0] = xs[::L] x_real[:, 1] = ys[::L] x_real[:, 2] = zs[::L] a = np.arange(0, int(t_tot / dt) + 1, 1) err = np.linalg.norm(x_real - mu, axis=1) plt.plot(a[::L], err, 'b', label="Global error") plt.axhline(np.mean(err), color='b', linestyle='dashed', label="Mean global error") err_x = np.abs(x_real[:, 0] - mu[:, 0]) plt.axhline(np.mean(err_x), color='g', linestyle='dashed', label="Mean error on x") plt.ylim(0, 6) legend = ax.legend(loc='upper right') for label in legend.get_texts(): label.set_fontsize('large') for label in legend.get_lines(): label.set_linewidth(1.5) if filename[3] is not None: fig.savefig(PATH + filename[3], bbox_inches='tight', pad_inches=0) plt.show()
def example3(): """model example 3 difficult to estimate: if just Y3 is manifest, huge regularization is required and direct effects are strongly biased, (if all yvars are manifest, just slight regularization is required and some standard errors are huge) """ X1, Y1, Y2, Y3 = symbols(["X1", "Y1", "Y2", "Y3"]) def define_equations(X1): eq_Y1 = 2 * X1 eq_Y2 = -X1 eq_Y3 = Y1 + Y2 return eq_Y1, eq_Y2, eq_Y3 model_dat = { "define_equations": define_equations, "xvars": [X1], "yvars": [Y1, Y2, Y3], "ymvars": [Y3], "final_var": Y3, "show_nr_indiv": 3, "estimate_bias": True, "alpha": None, "dof": None, "dir_path": "output/", } # simulate data import utils simulation_dat = { "xmean_true": [3], "sigx_theo": 1, "sigym_theo": 1, "rho": 0.2, "tau": 200, } model_dat.update(simulation_dat) xdat, ymdat = utils.simulate(model_dat) model_dat["xdat"] = xdat model_dat["ymdat"] = ymdat return model_dat
def example2(): """model example 2, no regularization required, no latent variables""" X1, Y1 = symbols([ "X1", "Y1", ]) def define_equations(X1): eq_Y1 = X1 return [eq_Y1] model_dat = { "define_equations": define_equations, "xvars": [X1], "yvars": [Y1], "ymvars": [Y1], "final_var": Y1, "show_nr_indiv": 3, "estimate_bias": True, "alpha": None, "dof": None, "dir_path": "output/", } # simulate data import utils simulation_dat = { "xmean_true": [3], "sigx_theo": 1, "sigym_theo": 1, "rho": 0.2, "tau": 200, } model_dat.update(simulation_dat) xdat, ymdat = utils.simulate(model_dat) model_dat["xdat"] = xdat model_dat["ymdat"] = ymdat return model_dat
def plot_mes_vs_real(export=False, filename='q2-mes-vs-real.pdf'): """Compare noisy measurements of the first coordinate of the particle positions with the actual simulated first coordinate. Arguments: export -- indicate if the plot should be PDF exported and saved (default False) filename -- exported PDF filename (default q2-mes-vs-real.pdf) """ t_tot = 50 dt = 0.001 ts = 0.01 L = int(ts / dt) xs, ys, zs = simulate(a=10, r=28, b=8 / 3, mu_0=(1, 1, 1), sigma_0=math.sqrt(0.001), dt=dt, sigma_u=math.sqrt(0.0000001), Gamma=np.eye(3), t_tot=t_tot) xs_m = measure(xs, L=L, sigma_m=1) fig, ax = plt.subplots() a = np.arange(0, int(t_tot / dt) + 1, 1) ax.plot(a, xs, 'b', label='First coordinate trajectory') ax.plot(a[:-1:L], xs_m, 'g.', label='Noisy measurements', markersize=4.0) legend = ax.legend(loc='upper right') for label in legend.get_texts(): label.set_fontsize('large') for label in legend.get_lines(): label.set_linewidth(1.5) plt.show() if export: fig.savefig(PATH + filename, bbox_inches='tight', pad_inches=0)
def main(): L = int(ts / dt) xs, ys, zs = simulate( t_tot, mu_0, sigma_0, a, r, b, dt, sigma_u, Gamma, ) xs_m = measure(xs, L, sigma_m) distribs_ekf = ekf_distribs(xs_m) distribs_csmc = csmc_distribs(xs_m) for k in range(3): C_ekf = np.sort(distribs_ekf[k]) C_csmc = np.sort(distribs_csmc[k]) R = np.arange(n) / float(n) fig, ax = plt.subplots() ax.plot(C_ekf, R, label="{} distrib. from EKF".format(dimensions[k])) ax.plot(C_csmc, R, label="{} distrib. from CSMC".format(dimensions[k])) legend = ax.legend(loc='upper right') for label in legend.get_texts(): label.set_fontsize('large') for label in legend.get_lines(): label.set_linewidth(1.5) #fig.savefig(PATH + "distrib-{}.pdf".format(dimensions[k]), # bbox_inches='tight', pad_inches=0) plt.show()
def plot_smc(a, r, b, dt, ts, t_tot, mu_0, sigma_0, sigma_u, Gamma, sigma_m, n, filename): L = int(ts / dt) xs, ys, zs = simulate( t_tot, mu_0, sigma_0, a, r, b, dt, sigma_u, Gamma, ) xs_m = measure(xs, L, sigma_m) x_tilde, y_tilde, z_tilde, x, y, z, wxs = classical_smc( a, r, b, dt, sigma_u, Gamma, mu_0, sigma_0, ts, t_tot, xs_m, sigma_m, n) # Histograms plot_hist(x, x_tilde, xs, dt, ts, filename[0]) plot_hist(y, x_tilde, ys, dt, ts, filename[1]) # Error function fig, ax = plt.subplots() x_real = np.empty((int(t_tot / ts) + 1, 3)) x_real[:, 0] = xs[::L] x_real[:, 1] = ys[::L] x_real[:, 2] = zs[::L] a = np.arange(0, int(t_tot / dt) + 1, 1) err = np.linalg.norm(x_real - wxs, axis=1) plt.plot(a[::L], err, 'b', label="Global error") plt.axhline(np.mean(err), color='b', linestyle='dashed', label="Mean global error") err_x = np.abs(x_real[:, 0] - wxs[:, 0]) plt.axhline(np.mean(err_x), color='g', linestyle='dashed', label="Mean error on x") #plt.ylim(0, 6) legend = ax.legend(loc='upper right') for label in legend.get_texts(): label.set_fontsize('large') for label in legend.get_lines(): label.set_linewidth(1.5) if filename[2] is not None: fig.savefig(PATH + filename[2], bbox_inches='tight', pad_inches=0) plt.close(fig) # Particles plot_particles(x_tilde, y_tilde, z_tilde, x, y, z, xs_m, wxs, 5, ts, filename[3]) plot_particles(x_tilde, y_tilde, z_tilde, x, y, z, xs_m, wxs, 15, ts, filename[4]) # Trajectory of first coordinates plot_trajectory(L, t_tot, dt, xs, xs_m, wxs[:, 0], 'x-', filename[5]) plot_trajectory(L, t_tot, dt, ys, None, wxs[:, 1], 'y-', filename[6]) plot_trajectory(L, t_tot, dt, zs, None, wxs[:, 2], 'z-', filename[7]) plt.show()
def test_fetch_robot_wrapper(fixed_base): # set this to output test results as video for easy investigation produce_debug_video = False observations = [] cfg_settings = examples.settings.default_sim_settings.copy() cfg_settings["scene"] = "NONE" cfg_settings["enable_physics"] = True # loading the physical scene hab_cfg = examples.settings.make_cfg(cfg_settings) with habitat_sim.Simulator(hab_cfg) as sim: obj_template_mgr = sim.get_object_template_manager() rigid_obj_mgr = sim.get_rigid_object_manager() # setup the camera for debug video (looking at 0,0,0) sim.agents[0].scene_node.translation = [0.0, -1.0, 2.0] # add a ground plane cube_handle = obj_template_mgr.get_template_handles("cubeSolid")[0] cube_template_cpy = obj_template_mgr.get_template_by_handle(cube_handle) cube_template_cpy.scale = np.array([5.0, 0.2, 5.0]) obj_template_mgr.register_template(cube_template_cpy) ground_plane = rigid_obj_mgr.add_object_by_template_handle(cube_handle) ground_plane.translation = [0.0, -0.2, 0.0] ground_plane.motion_type = habitat_sim.physics.MotionType.STATIC # compute a navmesh on the ground plane navmesh_settings = habitat_sim.NavMeshSettings() navmesh_settings.set_defaults() sim.recompute_navmesh(sim.pathfinder, navmesh_settings, True) sim.navmesh_visualization = True # add the robot to the world via the wrapper robot_path = "data/robots/hab_fetch/robots/hab_fetch.urdf" fetch = fetch_robot.FetchRobot(robot_path, sim, fixed_base=fixed_base) fetch.reconfigure() assert fetch.get_robot_sim_id() == 1 # 0 is the ground plane print(fetch.get_link_and_joint_names()) observations += simulate(sim, 1.0, produce_debug_video) # retract the arm observations += fetch._interpolate_arm_control( [1.2299035787582397, 2.345386505126953], [fetch.params.arm_joints[1], fetch.params.arm_joints[3]], 1, 30, produce_debug_video, ) # ready the arm observations += fetch._interpolate_arm_control( [-0.45, 0.1], [fetch.params.arm_joints[1], fetch.params.arm_joints[3]], 1, 30, produce_debug_video, ) # setting arm motor positions fetch.arm_motor_pos = np.zeros(len(fetch.params.arm_joints)) observations += simulate(sim, 1.0, produce_debug_video) # set base ground position from navmesh # NOTE: because the navmesh floats above the collision geometry we should see a pop/settle with dynamics and no fixed base target_base_pos = sim.pathfinder.snap_point(fetch.sim_obj.translation) fetch.base_pos = target_base_pos assert fetch.base_pos == target_base_pos observations += simulate(sim, 1.0, produce_debug_video) if fixed_base: assert np.allclose(fetch.base_pos, target_base_pos) else: assert not np.allclose(fetch.base_pos, target_base_pos) # arm joint queries and setters print(f" Arm joint velocities = {fetch.arm_velocity}") fetch.arm_joint_pos = np.ones(len(fetch.params.arm_joints)) fetch.arm_motor_pos = np.ones(len(fetch.params.arm_joints)) print(f" Arm joint positions (should be ones) = {fetch.arm_joint_pos}") print(f" Arm joint limits = {fetch.arm_joint_limits}") fetch.arm_motor_pos = fetch.arm_motor_pos observations += simulate(sim, 1.0, produce_debug_video) # test gripper state fetch.open_gripper() observations += simulate(sim, 1.0, produce_debug_video) assert fetch.is_gripper_open assert not fetch.is_gripper_closed fetch.close_gripper() observations += simulate(sim, 1.0, produce_debug_video) assert fetch.is_gripper_closed assert not fetch.is_gripper_open # halfway open fetch.set_gripper_target_state(0.5) observations += simulate(sim, 0.5, produce_debug_video) assert not fetch.is_gripper_open assert not fetch.is_gripper_closed # kinematic open/close (checked before simulation) fetch.gripper_joint_pos = fetch.params.gripper_open_state assert np.allclose(fetch.gripper_joint_pos, fetch.params.gripper_open_state) assert fetch.is_gripper_open observations += simulate(sim, 0.2, produce_debug_video) fetch.gripper_joint_pos = fetch.params.gripper_closed_state assert fetch.is_gripper_closed observations += simulate(sim, 0.2, produce_debug_video) # end effector queries print(f" End effector link id = {fetch.ee_link_id}") print(f" End effector local offset = {fetch.ee_local_offset}") print(f" End effector transform = {fetch.ee_transform}") print( f" End effector translation (at current state) = {fetch.calculate_ee_forward_kinematics(fetch.sim_obj.joint_positions)}" ) invalid_ef_target = np.array([100.0, 200.0, 300.0]) print( f" Clip end effector target ({invalid_ef_target}) to reach = {fetch.clip_ee_to_workspace(invalid_ef_target)}" ) # produce some test debug video if produce_debug_video: from habitat_sim.utils import viz_utils as vut vut.make_video( observations, "color_sensor", "color", "test_fetch_robot_wrapper__fixed_base=" + str(fixed_base), open_vid=True, )
def updateAll(self): game = self.game self.round += 1 transpsrl, rewpsrl = game.resample() avgstgy = self.avgstgyprofile() def getExploreStgy(owner, iset, explore_stgy, oppstgy, ds_c): rew, trans, reachp = ds_c hists = game.Iset2Hists[owner][iset] if game.isTerminal[hists[0]] == True: return player = game.playerOfIset[owner][iset] if player == owner: nacts = game.nactsOnIset[owner][iset] outcome = np.zeros(nacts) for a in range(nacts): getExploreStgy(owner, game.isetSucc[owner][iset][a], explore_stgy, oppstgy, ds_c) for h in hists: outcome[a] += reachp[h] * rew[game.histSucc[h] [a]][owner] a_star = np.argmax(outcome) _stgy = np.zeros(nacts) _stgy[a_star] = 1 explore_stgy[iset] = _stgy for h in hists: rew[h] = rew[game.histSucc[h][a_star]] else: truenacts = game.nactsOnHist[hists[0]] obsnacts = game.nactsOnIset[owner][iset] for h in hists: _stgy = None if player == 2: _stgy = trans[h] else: piset = game.Hist2Iset[player][h] _stgy = oppstgy[piset] nactsh = game.nactsOnHist[h] for a in range(nactsh): reachp[game.histSucc[h][a]] = reachp[h] * _stgy[a] for a in range(obsnacts): getExploreStgy(owner, game.isetSucc[owner][iset][a], explore_stgy, oppstgy, ds_c) for h in hists: _stgy = None if player == 2: _stgy = trans[h] else: piset = game.Hist2Iset[player][h] _stgy = oppstgy[piset] nactsh = game.nactsOnHist[h] for a in range(nactsh): rew[h] += rew[game.histSucc[h][a]] * _stgy[a] prob = np.ones(game.numHists) explore_stgy = [[], []] for i, iset in enumerate(range(game.numIsets[0])): nact = game.nactsOnIset[0][iset] if game.playerOfIset[0][iset] == 0: explore_stgy[0].append(np.ones(nact) / nact) else: explore_stgy[0].append(np.ones(0)) for i, iset in enumerate(range(game.numIsets[1])): nact = game.nactsOnIset[1][iset] if game.playerOfIset[1][iset] == 1: explore_stgy[1].append(np.ones(nact) / nact) else: explore_stgy[1].append(np.ones(0)) getExploreStgy(0, 0, explore_stgy[0], avgstgy[1], (rewpsrl, transpsrl, prob)) getExploreStgy(1, 0, explore_stgy[1], avgstgy[0], (rewpsrl, transpsrl, prob)) simulate(game, 0, explore_stgy) simulate(game, 0, explore_stgy) def updStgy(owner, iset, expstgy): player = game.playerOfIset[owner][iset] if player == owner: self.stgy[owner][iset] = expstgy[owner][iset].copy() for nxtiset in game.isetSucc[owner][iset]: updStgy(owner, nxtiset, expstgy) updStgy(0, 0, explore_stgy) updStgy(1, 0, explore_stgy) def updSumstgy(owner, iset, prob=1.0): player = game.playerOfIset[owner][iset] if player == owner: self.sumstgy[owner][iset] += prob * self.stgy[player][iset] for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): if prob * self.stgy[player][iset][aid] > 1e-8: updSumstgy(owner, nxtiset, prob * self.stgy[player][iset][aid]) else: for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): updSumstgy(owner, nxtiset, prob) updSumstgy(0, 0) updSumstgy(1, 0)
def updateAll(self): game = self.game self.round += 1 learningrate = 0.05 / (1.0 + 0.003 * np.sqrt(1.0 * self.round)) temperature = (1.0 + 0.02 * np.sqrt(1.0 * self.round)) def simulate( h, histtraj): #simulate strategies and add data to replay memory player = game.playerOfHist[h] curstgy = None if player == 2: curstgy = game.chanceprob[h] else: iset = game.Hist2Iset[player][h] curstgy = self.stgy[player][iset] a = np.random.choice(game.nactsOnHist[h], p=curstgy) nxth = game.histSucc[h][a] if game.isTerminal[nxth]: r = game.simulate(nxth) histtraj.append((h, a, nxth, r[0])) else: histtraj.append((h, a, nxth, 0.0)) simulate(nxth, histtraj) def translate_traj(owner, histtraj, isettraj): ids = [] for i in range(len(histtraj)): h = histtraj[i][0] if game.playerOfHist[h] == owner: ids.append(i) if len(ids) == 0: return [] for i, inds in enumerate(ids): h = histtraj[inds][0] a = histtraj[inds][1] iset = game.Hist2Iset[owner][h] if i == len(ids) - 1: niset = -1 rews = 0.0 for j in range(inds, len(histtraj)): rews += histtraj[j][3] if owner == 1: rews *= -1 isettraj.append((iset, a, -1, rews)) else: nh = histtraj[ids[i + 1]][0] niset = game.Hist2Iset[owner][nh] isettraj.append((iset, a, niset, 0.0)) for _fsdf in range(2): histtraj = [] simulate(0, histtraj) isettrajs = [[], []] translate_traj(0, histtraj, isettrajs[0]) translate_traj(1, histtraj, isettrajs[1]) self.trajs[0].append(isettrajs[0]) self.trajs[1].append(isettrajs[1]) def updQtraj(owner, isettraj, lr): for iset, a, niset, rew in isettraj: if niset == -1: self.Q[owner][iset][a] = ( 1.0 - lr) * self.Q[owner][iset][a] + lr * rew else: self.Q[owner][iset][a] = (1.0 - lr) * self.Q[owner][iset][ a] + lr * self.Q[owner][iset].max() for k in range(30): for p in range(2): trajid = np.random.randint(0, len(self.trajs[p])) updQtraj(p, self.trajs[p][trajid], learningrate) self.genStgy(temperature) def updSumstgy(owner, iset, prob=1.0): player = game.playerOfIset[owner][iset] if player == owner: self.sumstgy[owner][iset] += prob * self.stgy[player][iset] for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): if prob * self.stgy[player][iset][aid] > 1e-8: updSumstgy(owner, nxtiset, prob * self.stgy[player][iset][aid]) else: for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): updSumstgy(owner, nxtiset, prob) updSumstgy(0, 0) updSumstgy(1, 0)
def updateAll(self): game = self.game self.round += 1 transpsrl, rewpsrl = game.resample() transvalidation, rewvalidation = game.resample() def avgchance(h, curtrans, currew, w, sumtrans, sumrew, sumw): sumw[h] += w term = game.isTerminal[h] player = game.playerOfHist[h] if term == True: sumrew[h] += (w * currew[h][0], w * currew[h][1]) return if player == 2: for a in range(game.nactsOnHist[h]): avgchance(game.histSucc[h][a], curtrans, currew, w * curtrans[h][a], sumtrans, sumrew, sumw) sumtrans[h] += w * curtrans[h] else: for a in range(game.nactsOnHist[h]): avgchance(game.histSucc[h][a], curtrans, currew, w, sumtrans, sumrew, sumw) avgchance(0, transpsrl, rewpsrl, 1.0, self.sampledtrans1, self.sampledrews1, self.weight1) self.update(0, 0, [np.ones(1), np.ones(1)], [0], rewpsrl, transpsrl) #the CFR algorithm self.update(1, 0, [np.ones(1), np.ones(1)], [0], rewpsrl, transpsrl) #the CFR algorithm def updStgy(owner, iset): if self.isetflag[owner][iset] != self.round: return player = game.playerOfIset[owner][iset] if player == owner: self.stgy[owner][iset] = self.solvers[owner][ iset].curstgy.copy() for nxtiset in game.isetSucc[owner][iset]: updStgy(owner, nxtiset) updStgy(0, 0) updStgy(1, 0) def updSumstgy(owner, iset, prob=1.0): player = game.playerOfIset[owner][iset] if player == owner: self.sumstgy[owner][iset] += prob * self.stgy[player][iset] for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): if prob * self.stgy[player][iset][aid] > 1e-8: updSumstgy(owner, nxtiset, prob * self.stgy[player][iset][aid]) else: for aid, nxtiset in enumerate(game.isetSucc[owner][iset]): updSumstgy(owner, nxtiset, prob) updSumstgy(0, 0) updSumstgy(1, 0) avgstgy = self.avgstgyprofile() def getExploreStgy(owner, iset, explore_stgy, oppstgy, ds_c1, ds_c2): rew1, trans1, reachp1 = ds_c1 rew2, trans2, reachp2 = ds_c2 hists = game.Iset2Hists[owner][iset] if game.isTerminal[hists[0]] == True: return player = game.playerOfIset[owner][iset] if player == owner: nacts = game.nactsOnIset[owner][iset] outcome1 = np.zeros(nacts) outcome2 = np.zeros(nacts) for a in range(nacts): getExploreStgy(owner, game.isetSucc[owner][iset][a], explore_stgy, oppstgy, ds_c1, ds_c2) for h in hists: outcome1[a] += reachp1[h] * rew1[game.histSucc[h] [a]][owner] outcome2[a] += reachp2[h] * rew2[game.histSucc[h] [a]][owner] a_star = np.argmax(outcome1 - outcome2) _stgy = np.zeros(nacts) _stgy[a_star] = 1 explore_stgy[iset] = _stgy for h in hists: rew1[h] = rew1[game.histSucc[h][a_star]] rew2[h] = rew2[game.histSucc[h][a_star]] else: truenacts = game.nactsOnHist[hists[0]] obsnacts = game.nactsOnIset[owner][iset] for h in hists: _stgy1 = None _stgy2 = None if player == 2: _stgy1 = trans1[h] _stgy2 = trans2[h] else: piset = game.Hist2Iset[player][h] _stgy1 = oppstgy[piset] _stgy2 = oppstgy[piset] nactsh = game.nactsOnHist[h] for a in range(nactsh): reachp1[game.histSucc[h][a]] = reachp1[h] * _stgy1[a] reachp2[game.histSucc[h][a]] = reachp2[h] * _stgy2[a] for a in range(obsnacts): getExploreStgy(owner, game.isetSucc[owner][iset][a], explore_stgy, oppstgy, ds_c1, ds_c2) for h in hists: _stgy1 = None _stgy2 = None if player == 2: _stgy1 = trans1[h] _stgy2 = trans2[h] else: piset = game.Hist2Iset[player][h] _stgy1 = oppstgy[piset] _stgy2 = oppstgy[piset] nactsh = game.nactsOnHist[h] for a in range(nactsh): rew1[h] += rew1[game.histSucc[h][a]] * _stgy1[a] rew2[h] += rew2[game.histSucc[h][a]] * _stgy2[a] if iset == 0: pass #print("check", rew1[0] - rew2[0], rew1[0], rew2[0]) #avgchrew = np.zeros(game.numHists).tolist() #def getavgchance(h, avgchrew, avgchtrans): #getavgchance(0, avgchrew, avgchtrans) prob1 = np.ones(game.numHists) prob2 = np.ones(game.numHists) explore_stgy = [[], []] for i, iset in enumerate(range(game.numIsets[0])): nact = game.nactsOnIset[0][iset] if game.playerOfIset[0][iset] == 0: explore_stgy[0].append(np.ones(nact) / nact) else: explore_stgy[0].append(np.ones(0)) for i, iset in enumerate(range(game.numIsets[1])): nact = game.nactsOnIset[1][iset] if game.playerOfIset[1][iset] == 1: explore_stgy[1].append(np.ones(nact) / nact) else: explore_stgy[1].append(np.ones(0)) avgchtrans = copy.deepcopy(self.sampledtrans1) avgchrew = copy.deepcopy(self.sampledrews1) for h in range(game.numHists): if game.isTerminal[h]: avgchrew[h] /= self.weight1[h] if game.playerOfHist[h] == 2: avgchtrans[h] /= self.weight1[h] if self.Type == "default": getExploreStgy(0, 0, explore_stgy[0], avgstgy[1], (rewvalidation, transvalidation, prob2), (avgchrew, avgchtrans, prob1)) getExploreStgy(1, 0, explore_stgy[1], avgstgy[0], (rewvalidation, transvalidation, prob2), (avgchrew, avgchtrans, prob1)) for t in range(1): simulate(game, 0, [explore_stgy[0], self.stgy[1]]) simulate(game, 0, [self.stgy[0], explore_stgy[1]]) if self.Type == "br_dirc": getExploreStgy(0, 0, explore_stgy[0], avgstgy[1], (rewvalidation, transvalidation, prob2), (avgchrew * 0.0, avgchtrans, prob1)) getExploreStgy(1, 0, explore_stgy[1], avgstgy[0], (rewvalidation, transvalidation, prob2), (avgchrew * 0.0, avgchtrans, prob1)) for t in range(1): simulate(game, 0, [explore_stgy[0], self.stgy[1]]) simulate(game, 0, [self.stgy[0], explore_stgy[1]]) if self.Type == "ordinary": simulate(game, 0, self.stgy) simulate(game, 0, self.stgy) if self.Type == "random": for i in range(2): for iset in range(game.numIsets[i]): pl = game.playerOfIset[i][iset] explore_stgy[i].append(0) if pl == i: nacts = game.nactsOnIset[i][iset] explore_stgy[i][iset] = np.ones(nacts) / nacts simulate(game, 0, explore_stgy) simulate(game, 0, explore_stgy)