コード例 #1
0
def plot_3d_trajectory(export=False, filename='q2-3d-trajectory.pdf'):
    """Simulate the 3d-system for 50 seconds and make a 3D plot of the
    trajectory.

    Arguments:
    export -- indicate if the plot should be PDF exported and saved (default
    False)
    filename -- exported PDF filename (default q2-3d-trajectory.pdf)
    """
    xs, ys, zs = simulate(a=10,
                          r=28,
                          b=8 / 3,
                          mu_0=(1, 1, 1),
                          sigma_0=math.sqrt(0.001),
                          dt=0.001,
                          sigma_u=math.sqrt(0.0000001),
                          Gamma=np.eye(3),
                          t_tot=50)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.scatter(xs[0], ys[0], zs[0], color='g')
    ax.plot(xs, ys, zs)
    plt.show()

    if export:
        fig.savefig(PATH + filename, bbox_inches='tight', pad_inches=0)
コード例 #2
0
ファイル: models.py プロジェクト: vishalbelsare/Causing
def example():
    """model example"""

    X1, X2, Y1, Y2, Y3 = symbols(["X1", "X2", "Y1", "Y2", "Y3"])

    def define_equations(X1, X2):

        eq_Y1 = X1
        eq_Y2 = X2 + 2 * Y1**2
        eq_Y3 = Y1 + Y2

        return eq_Y1, eq_Y2, eq_Y3

    model_dat = {
        "define_equations": define_equations,  # equations in topological order
        "xvars": [X1, X2],  # exogenous variables in desired order
        "yvars": [Y1, Y2, Y3],  # endogenous variables in topological order
        "ymvars": [Y3],  # manifest endogenous variables
        "final_var": Y3,  # final variable of interest, for mediation analysis
        "show_nr_indiv": 3,  # show first individual effects
        "estimate_bias":
        True,  # estimate equation biases, for model validation
        "alpha": None,  # regularization parameter, is estimated if None
        "dof": None,  # effective degrees of freedom, corresponding to alpha
        "dir_path": "output/",  # output directory path
    }

    # simulate data
    import utils
    simulation_dat = {
        "xmean_true": [3, 2],  # mean of exogeneous data
        "sigx_theo": 1,  # true scalar error variance of xvars
        "sigym_theo": 1,  # true scalar error variance of ymvars
        "rho": 0.2,  # true correlation within y and within x vars
        "tau": 200,  # nr. of simulated observations
    }
    model_dat.update(simulation_dat)
    xdat, ymdat = utils.simulate(model_dat)

    # save data
    # =============================================================================
    #     from numpy import savetxt
    #     savetxt("data/xdat.csv", xdat, delimiter=",")
    #     savetxt("data/ymdat.csv", ymdat, delimiter=",")
    # =============================================================================

    # load data
    # =============================================================================
    #     from numpy import loadtxt
    #     xdat = loadtxt("data/xdat.csv", delimiter=",").reshape(len(model_dat["xvars"]), -1)
    #     ymdat = loadtxt("data/ymdat.csv", delimiter=",").reshape(len(model_dat["ymvars"]), -1)
    # =============================================================================

    model_dat["xdat"] = xdat  # exogenous data
    model_dat["ymdat"] = ymdat  # manifest endogenous data

    return model_dat
コード例 #3
0
 def simulate(
         h,
         histtraj):  #simulate strategies and add data to replay memory
     player = game.playerOfHist[h]
     curstgy = None
     if player == 2:
         curstgy = game.chanceprob[h]
     else:
         iset = game.Hist2Iset[player][h]
         curstgy = self.stgy[player][iset]
     a = np.random.choice(game.nactsOnHist[h], p=curstgy)
     nxth = game.histSucc[h][a]
     if game.isTerminal[nxth]:
         r = game.simulate(nxth)
         histtraj.append((h, a, nxth, r[0]))
     else:
         histtraj.append((h, a, nxth, 0.0))
         simulate(nxth, histtraj)
コード例 #4
0
def plot_ekf(filename):
    t_tot = 16
    ts = 0.01
    dt = 0.001
    L = int(ts / dt)
    mu_0 = 1
    sigma_0 = math.sqrt(0.001)
    sigma_u = math.sqrt(0.01)
    sigma_m = math.sqrt(1)
    a, r, b = 10, 28, 8 / 3
    Gamma = np.eye(3)

    xs, ys, zs = simulate(t_tot, mu_0, sigma_0, a, r, b, dt, sigma_u, Gamma)
    xs_m = measure(xs, L, sigma_m)

    mu, cov = ekf(a, r, b, dt, sigma_u, Gamma, mu_0, sigma_0, ts, t_tot, xs_m,
                  sigma_m)

    print(cov[int(5 / ts)][0, 0])

    plot_trajectory(L, t_tot, dt, xs, xs_m, mu[:, 0], 'x', filename[0])
    plot_trajectory(L, t_tot, dt, ys, None, mu[:, 1], 'y', filename[1])
    plot_trajectory(L, t_tot, dt, zs, None, mu[:, 2], 'z', filename[2])

    # Error function
    fig, ax = plt.subplots()
    x_real = np.empty((int(t_tot / ts) + 1, 3))
    x_real[:, 0] = xs[::L]
    x_real[:, 1] = ys[::L]
    x_real[:, 2] = zs[::L]

    a = np.arange(0, int(t_tot / dt) + 1, 1)
    err = np.linalg.norm(x_real - mu, axis=1)
    plt.plot(a[::L], err, 'b', label="Global error")
    plt.axhline(np.mean(err),
                color='b',
                linestyle='dashed',
                label="Mean global error")
    err_x = np.abs(x_real[:, 0] - mu[:, 0])
    plt.axhline(np.mean(err_x),
                color='g',
                linestyle='dashed',
                label="Mean error on x")
    plt.ylim(0, 6)

    legend = ax.legend(loc='upper right')
    for label in legend.get_texts():
        label.set_fontsize('large')

    for label in legend.get_lines():
        label.set_linewidth(1.5)

    if filename[3] is not None:
        fig.savefig(PATH + filename[3], bbox_inches='tight', pad_inches=0)

    plt.show()
コード例 #5
0
ファイル: models.py プロジェクト: vishalbelsare/Causing
def example3():
    """model example 3
    
    difficult to estimate:
    if just Y3 is manifest, huge regularization is required and direct effects are strongly biased,
    (if all yvars are manifest, just slight regularization is required and some standard errors are huge)
    """

    X1, Y1, Y2, Y3 = symbols(["X1", "Y1", "Y2", "Y3"])

    def define_equations(X1):

        eq_Y1 = 2 * X1
        eq_Y2 = -X1
        eq_Y3 = Y1 + Y2

        return eq_Y1, eq_Y2, eq_Y3

    model_dat = {
        "define_equations": define_equations,
        "xvars": [X1],
        "yvars": [Y1, Y2, Y3],
        "ymvars": [Y3],
        "final_var": Y3,
        "show_nr_indiv": 3,
        "estimate_bias": True,
        "alpha": None,
        "dof": None,
        "dir_path": "output/",
    }

    # simulate data
    import utils
    simulation_dat = {
        "xmean_true": [3],
        "sigx_theo": 1,
        "sigym_theo": 1,
        "rho": 0.2,
        "tau": 200,
    }
    model_dat.update(simulation_dat)
    xdat, ymdat = utils.simulate(model_dat)

    model_dat["xdat"] = xdat
    model_dat["ymdat"] = ymdat

    return model_dat
コード例 #6
0
ファイル: models.py プロジェクト: vishalbelsare/Causing
def example2():
    """model example 2, no regularization required, no latent variables"""

    X1, Y1 = symbols([
        "X1",
        "Y1",
    ])

    def define_equations(X1):

        eq_Y1 = X1

        return [eq_Y1]

    model_dat = {
        "define_equations": define_equations,
        "xvars": [X1],
        "yvars": [Y1],
        "ymvars": [Y1],
        "final_var": Y1,
        "show_nr_indiv": 3,
        "estimate_bias": True,
        "alpha": None,
        "dof": None,
        "dir_path": "output/",
    }

    # simulate data
    import utils
    simulation_dat = {
        "xmean_true": [3],
        "sigx_theo": 1,
        "sigym_theo": 1,
        "rho": 0.2,
        "tau": 200,
    }
    model_dat.update(simulation_dat)
    xdat, ymdat = utils.simulate(model_dat)

    model_dat["xdat"] = xdat
    model_dat["ymdat"] = ymdat

    return model_dat
コード例 #7
0
def plot_mes_vs_real(export=False, filename='q2-mes-vs-real.pdf'):
    """Compare noisy measurements of the first coordinate of the particle
    positions with the actual simulated first coordinate.

    Arguments:
    export -- indicate if the plot should be PDF exported and saved (default
    False)
    filename -- exported PDF filename (default q2-mes-vs-real.pdf)
    """
    t_tot = 50
    dt = 0.001
    ts = 0.01
    L = int(ts / dt)
    xs, ys, zs = simulate(a=10,
                          r=28,
                          b=8 / 3,
                          mu_0=(1, 1, 1),
                          sigma_0=math.sqrt(0.001),
                          dt=dt,
                          sigma_u=math.sqrt(0.0000001),
                          Gamma=np.eye(3),
                          t_tot=t_tot)
    xs_m = measure(xs, L=L, sigma_m=1)

    fig, ax = plt.subplots()
    a = np.arange(0, int(t_tot / dt) + 1, 1)
    ax.plot(a, xs, 'b', label='First coordinate trajectory')
    ax.plot(a[:-1:L], xs_m, 'g.', label='Noisy measurements', markersize=4.0)
    legend = ax.legend(loc='upper right')

    for label in legend.get_texts():
        label.set_fontsize('large')

    for label in legend.get_lines():
        label.set_linewidth(1.5)

    plt.show()

    if export:
        fig.savefig(PATH + filename, bbox_inches='tight', pad_inches=0)
コード例 #8
0
ファイル: q7_dist.py プロジェクト: anpar/linma1731-project
def main():
    L = int(ts / dt)
    xs, ys, zs = simulate(
        t_tot,
        mu_0,
        sigma_0,
        a,
        r,
        b,
        dt,
        sigma_u,
        Gamma,
    )
    xs_m = measure(xs, L, sigma_m)

    distribs_ekf = ekf_distribs(xs_m)
    distribs_csmc = csmc_distribs(xs_m)

    for k in range(3):
        C_ekf = np.sort(distribs_ekf[k])
        C_csmc = np.sort(distribs_csmc[k])
        R = np.arange(n) / float(n)

        fig, ax = plt.subplots()
        ax.plot(C_ekf, R, label="{} distrib. from EKF".format(dimensions[k]))
        ax.plot(C_csmc, R, label="{} distrib. from CSMC".format(dimensions[k]))

        legend = ax.legend(loc='upper right')

        for label in legend.get_texts():
            label.set_fontsize('large')

        for label in legend.get_lines():
            label.set_linewidth(1.5)

        #fig.savefig(PATH + "distrib-{}.pdf".format(dimensions[k]),
        #            bbox_inches='tight', pad_inches=0)

        plt.show()
コード例 #9
0
def plot_smc(a, r, b, dt, ts, t_tot, mu_0, sigma_0, sigma_u, Gamma, sigma_m, n,
             filename):

    L = int(ts / dt)

    xs, ys, zs = simulate(
        t_tot,
        mu_0,
        sigma_0,
        a,
        r,
        b,
        dt,
        sigma_u,
        Gamma,
    )
    xs_m = measure(xs, L, sigma_m)

    x_tilde, y_tilde, z_tilde, x, y, z, wxs = classical_smc(
        a, r, b, dt, sigma_u, Gamma, mu_0, sigma_0, ts, t_tot, xs_m, sigma_m,
        n)

    # Histograms
    plot_hist(x, x_tilde, xs, dt, ts, filename[0])
    plot_hist(y, x_tilde, ys, dt, ts, filename[1])

    # Error function
    fig, ax = plt.subplots()
    x_real = np.empty((int(t_tot / ts) + 1, 3))
    x_real[:, 0] = xs[::L]
    x_real[:, 1] = ys[::L]
    x_real[:, 2] = zs[::L]

    a = np.arange(0, int(t_tot / dt) + 1, 1)
    err = np.linalg.norm(x_real - wxs, axis=1)
    plt.plot(a[::L], err, 'b', label="Global error")
    plt.axhline(np.mean(err),
                color='b',
                linestyle='dashed',
                label="Mean global error")
    err_x = np.abs(x_real[:, 0] - wxs[:, 0])
    plt.axhline(np.mean(err_x),
                color='g',
                linestyle='dashed',
                label="Mean error on x")
    #plt.ylim(0, 6)

    legend = ax.legend(loc='upper right')
    for label in legend.get_texts():
        label.set_fontsize('large')

    for label in legend.get_lines():
        label.set_linewidth(1.5)

    if filename[2] is not None:
        fig.savefig(PATH + filename[2], bbox_inches='tight', pad_inches=0)

    plt.close(fig)

    # Particles
    plot_particles(x_tilde, y_tilde, z_tilde, x, y, z, xs_m, wxs, 5, ts,
                   filename[3])
    plot_particles(x_tilde, y_tilde, z_tilde, x, y, z, xs_m, wxs, 15, ts,
                   filename[4])

    # Trajectory of first coordinates
    plot_trajectory(L, t_tot, dt, xs, xs_m, wxs[:, 0], 'x-', filename[5])
    plot_trajectory(L, t_tot, dt, ys, None, wxs[:, 1], 'y-', filename[6])
    plot_trajectory(L, t_tot, dt, zs, None, wxs[:, 2], 'z-', filename[7])

    plt.show()
コード例 #10
0
def test_fetch_robot_wrapper(fixed_base):
    # set this to output test results as video for easy investigation
    produce_debug_video = False
    observations = []
    cfg_settings = examples.settings.default_sim_settings.copy()
    cfg_settings["scene"] = "NONE"
    cfg_settings["enable_physics"] = True

    # loading the physical scene
    hab_cfg = examples.settings.make_cfg(cfg_settings)

    with habitat_sim.Simulator(hab_cfg) as sim:
        obj_template_mgr = sim.get_object_template_manager()
        rigid_obj_mgr = sim.get_rigid_object_manager()

        # setup the camera for debug video (looking at 0,0,0)
        sim.agents[0].scene_node.translation = [0.0, -1.0, 2.0]

        # add a ground plane
        cube_handle = obj_template_mgr.get_template_handles("cubeSolid")[0]
        cube_template_cpy = obj_template_mgr.get_template_by_handle(cube_handle)
        cube_template_cpy.scale = np.array([5.0, 0.2, 5.0])
        obj_template_mgr.register_template(cube_template_cpy)
        ground_plane = rigid_obj_mgr.add_object_by_template_handle(cube_handle)
        ground_plane.translation = [0.0, -0.2, 0.0]
        ground_plane.motion_type = habitat_sim.physics.MotionType.STATIC

        # compute a navmesh on the ground plane
        navmesh_settings = habitat_sim.NavMeshSettings()
        navmesh_settings.set_defaults()
        sim.recompute_navmesh(sim.pathfinder, navmesh_settings, True)
        sim.navmesh_visualization = True

        # add the robot to the world via the wrapper
        robot_path = "data/robots/hab_fetch/robots/hab_fetch.urdf"
        fetch = fetch_robot.FetchRobot(robot_path, sim, fixed_base=fixed_base)
        fetch.reconfigure()
        assert fetch.get_robot_sim_id() == 1  # 0 is the ground plane
        print(fetch.get_link_and_joint_names())
        observations += simulate(sim, 1.0, produce_debug_video)

        # retract the arm
        observations += fetch._interpolate_arm_control(
            [1.2299035787582397, 2.345386505126953],
            [fetch.params.arm_joints[1], fetch.params.arm_joints[3]],
            1,
            30,
            produce_debug_video,
        )

        # ready the arm
        observations += fetch._interpolate_arm_control(
            [-0.45, 0.1],
            [fetch.params.arm_joints[1], fetch.params.arm_joints[3]],
            1,
            30,
            produce_debug_video,
        )

        # setting arm motor positions
        fetch.arm_motor_pos = np.zeros(len(fetch.params.arm_joints))
        observations += simulate(sim, 1.0, produce_debug_video)

        # set base ground position from navmesh
        # NOTE: because the navmesh floats above the collision geometry we should see a pop/settle with dynamics and no fixed base
        target_base_pos = sim.pathfinder.snap_point(fetch.sim_obj.translation)
        fetch.base_pos = target_base_pos
        assert fetch.base_pos == target_base_pos
        observations += simulate(sim, 1.0, produce_debug_video)
        if fixed_base:
            assert np.allclose(fetch.base_pos, target_base_pos)
        else:
            assert not np.allclose(fetch.base_pos, target_base_pos)

        # arm joint queries and setters
        print(f" Arm joint velocities = {fetch.arm_velocity}")
        fetch.arm_joint_pos = np.ones(len(fetch.params.arm_joints))
        fetch.arm_motor_pos = np.ones(len(fetch.params.arm_joints))
        print(f" Arm joint positions (should be ones) = {fetch.arm_joint_pos}")
        print(f" Arm joint limits = {fetch.arm_joint_limits}")
        fetch.arm_motor_pos = fetch.arm_motor_pos
        observations += simulate(sim, 1.0, produce_debug_video)

        # test gripper state
        fetch.open_gripper()
        observations += simulate(sim, 1.0, produce_debug_video)
        assert fetch.is_gripper_open
        assert not fetch.is_gripper_closed
        fetch.close_gripper()
        observations += simulate(sim, 1.0, produce_debug_video)
        assert fetch.is_gripper_closed
        assert not fetch.is_gripper_open

        # halfway open
        fetch.set_gripper_target_state(0.5)
        observations += simulate(sim, 0.5, produce_debug_video)
        assert not fetch.is_gripper_open
        assert not fetch.is_gripper_closed

        # kinematic open/close (checked before simulation)
        fetch.gripper_joint_pos = fetch.params.gripper_open_state
        assert np.allclose(fetch.gripper_joint_pos, fetch.params.gripper_open_state)
        assert fetch.is_gripper_open
        observations += simulate(sim, 0.2, produce_debug_video)
        fetch.gripper_joint_pos = fetch.params.gripper_closed_state
        assert fetch.is_gripper_closed
        observations += simulate(sim, 0.2, produce_debug_video)

        # end effector queries
        print(f" End effector link id = {fetch.ee_link_id}")
        print(f" End effector local offset = {fetch.ee_local_offset}")
        print(f" End effector transform = {fetch.ee_transform}")
        print(
            f" End effector translation (at current state) = {fetch.calculate_ee_forward_kinematics(fetch.sim_obj.joint_positions)}"
        )
        invalid_ef_target = np.array([100.0, 200.0, 300.0])
        print(
            f" Clip end effector target ({invalid_ef_target}) to reach = {fetch.clip_ee_to_workspace(invalid_ef_target)}"
        )

        # produce some test debug video
        if produce_debug_video:
            from habitat_sim.utils import viz_utils as vut

            vut.make_video(
                observations,
                "color_sensor",
                "color",
                "test_fetch_robot_wrapper__fixed_base=" + str(fixed_base),
                open_vid=True,
            )
コード例 #11
0
ファイル: ficplay.py プロジェクト: iclrsubmit1234/submit
    def updateAll(self):
        game = self.game

        self.round += 1

        transpsrl, rewpsrl = game.resample()

        avgstgy = self.avgstgyprofile()

        def getExploreStgy(owner, iset, explore_stgy, oppstgy, ds_c):
            rew, trans, reachp = ds_c
            hists = game.Iset2Hists[owner][iset]
            if game.isTerminal[hists[0]] == True:
                return
            player = game.playerOfIset[owner][iset]
            if player == owner:
                nacts = game.nactsOnIset[owner][iset]
                outcome = np.zeros(nacts)
                for a in range(nacts):
                    getExploreStgy(owner, game.isetSucc[owner][iset][a],
                                   explore_stgy, oppstgy, ds_c)

                    for h in hists:
                        outcome[a] += reachp[h] * rew[game.histSucc[h]
                                                      [a]][owner]
                a_star = np.argmax(outcome)
                _stgy = np.zeros(nacts)
                _stgy[a_star] = 1

                explore_stgy[iset] = _stgy

                for h in hists:
                    rew[h] = rew[game.histSucc[h][a_star]]

            else:
                truenacts = game.nactsOnHist[hists[0]]
                obsnacts = game.nactsOnIset[owner][iset]
                for h in hists:
                    _stgy = None
                    if player == 2:
                        _stgy = trans[h]
                    else:
                        piset = game.Hist2Iset[player][h]
                        _stgy = oppstgy[piset]
                    nactsh = game.nactsOnHist[h]
                    for a in range(nactsh):
                        reachp[game.histSucc[h][a]] = reachp[h] * _stgy[a]

                for a in range(obsnacts):
                    getExploreStgy(owner, game.isetSucc[owner][iset][a],
                                   explore_stgy, oppstgy, ds_c)
                for h in hists:
                    _stgy = None
                    if player == 2:
                        _stgy = trans[h]
                    else:
                        piset = game.Hist2Iset[player][h]
                        _stgy = oppstgy[piset]
                    nactsh = game.nactsOnHist[h]
                    for a in range(nactsh):
                        rew[h] += rew[game.histSucc[h][a]] * _stgy[a]

        prob = np.ones(game.numHists)
        explore_stgy = [[], []]

        for i, iset in enumerate(range(game.numIsets[0])):
            nact = game.nactsOnIset[0][iset]
            if game.playerOfIset[0][iset] == 0:
                explore_stgy[0].append(np.ones(nact) / nact)
            else:
                explore_stgy[0].append(np.ones(0))
        for i, iset in enumerate(range(game.numIsets[1])):
            nact = game.nactsOnIset[1][iset]
            if game.playerOfIset[1][iset] == 1:
                explore_stgy[1].append(np.ones(nact) / nact)
            else:
                explore_stgy[1].append(np.ones(0))

        getExploreStgy(0, 0, explore_stgy[0], avgstgy[1],
                       (rewpsrl, transpsrl, prob))
        getExploreStgy(1, 0, explore_stgy[1], avgstgy[0],
                       (rewpsrl, transpsrl, prob))

        simulate(game, 0, explore_stgy)
        simulate(game, 0, explore_stgy)

        def updStgy(owner, iset, expstgy):
            player = game.playerOfIset[owner][iset]
            if player == owner:
                self.stgy[owner][iset] = expstgy[owner][iset].copy()
            for nxtiset in game.isetSucc[owner][iset]:
                updStgy(owner, nxtiset, expstgy)

        updStgy(0, 0, explore_stgy)
        updStgy(1, 0, explore_stgy)

        def updSumstgy(owner, iset, prob=1.0):
            player = game.playerOfIset[owner][iset]
            if player == owner:
                self.sumstgy[owner][iset] += prob * self.stgy[player][iset]
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    if prob * self.stgy[player][iset][aid] > 1e-8:
                        updSumstgy(owner, nxtiset,
                                   prob * self.stgy[player][iset][aid])
            else:
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    updSumstgy(owner, nxtiset, prob)

        updSumstgy(0, 0)
        updSumstgy(1, 0)
コード例 #12
0
    def updateAll(self):
        game = self.game

        self.round += 1

        learningrate = 0.05 / (1.0 + 0.003 * np.sqrt(1.0 * self.round))
        temperature = (1.0 + 0.02 * np.sqrt(1.0 * self.round))

        def simulate(
                h,
                histtraj):  #simulate strategies and add data to replay memory
            player = game.playerOfHist[h]
            curstgy = None
            if player == 2:
                curstgy = game.chanceprob[h]
            else:
                iset = game.Hist2Iset[player][h]
                curstgy = self.stgy[player][iset]
            a = np.random.choice(game.nactsOnHist[h], p=curstgy)
            nxth = game.histSucc[h][a]
            if game.isTerminal[nxth]:
                r = game.simulate(nxth)
                histtraj.append((h, a, nxth, r[0]))
            else:
                histtraj.append((h, a, nxth, 0.0))
                simulate(nxth, histtraj)

        def translate_traj(owner, histtraj, isettraj):
            ids = []
            for i in range(len(histtraj)):
                h = histtraj[i][0]
                if game.playerOfHist[h] == owner:
                    ids.append(i)
            if len(ids) == 0:
                return []
            for i, inds in enumerate(ids):
                h = histtraj[inds][0]
                a = histtraj[inds][1]
                iset = game.Hist2Iset[owner][h]
                if i == len(ids) - 1:
                    niset = -1
                    rews = 0.0
                    for j in range(inds, len(histtraj)):
                        rews += histtraj[j][3]
                    if owner == 1:
                        rews *= -1
                    isettraj.append((iset, a, -1, rews))
                else:
                    nh = histtraj[ids[i + 1]][0]
                    niset = game.Hist2Iset[owner][nh]
                    isettraj.append((iset, a, niset, 0.0))

        for _fsdf in range(2):
            histtraj = []
            simulate(0, histtraj)
            isettrajs = [[], []]
            translate_traj(0, histtraj, isettrajs[0])
            translate_traj(1, histtraj, isettrajs[1])

            self.trajs[0].append(isettrajs[0])
            self.trajs[1].append(isettrajs[1])

        def updQtraj(owner, isettraj, lr):
            for iset, a, niset, rew in isettraj:
                if niset == -1:
                    self.Q[owner][iset][a] = (
                        1.0 - lr) * self.Q[owner][iset][a] + lr * rew
                else:
                    self.Q[owner][iset][a] = (1.0 - lr) * self.Q[owner][iset][
                        a] + lr * self.Q[owner][iset].max()

        for k in range(30):
            for p in range(2):
                trajid = np.random.randint(0, len(self.trajs[p]))
                updQtraj(p, self.trajs[p][trajid], learningrate)

        self.genStgy(temperature)

        def updSumstgy(owner, iset, prob=1.0):
            player = game.playerOfIset[owner][iset]
            if player == owner:
                self.sumstgy[owner][iset] += prob * self.stgy[player][iset]
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    if prob * self.stgy[player][iset][aid] > 1e-8:
                        updSumstgy(owner, nxtiset,
                                   prob * self.stgy[player][iset][aid])
            else:
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    updSumstgy(owner, nxtiset, prob)

        updSumstgy(0, 0)
        updSumstgy(1, 0)
コード例 #13
0
    def updateAll(self):
        game = self.game

        self.round += 1

        transpsrl, rewpsrl = game.resample()
        transvalidation, rewvalidation = game.resample()

        def avgchance(h, curtrans, currew, w, sumtrans, sumrew, sumw):
            sumw[h] += w
            term = game.isTerminal[h]
            player = game.playerOfHist[h]
            if term == True:

                sumrew[h] += (w * currew[h][0], w * currew[h][1])
                return
            if player == 2:
                for a in range(game.nactsOnHist[h]):
                    avgchance(game.histSucc[h][a], curtrans, currew,
                              w * curtrans[h][a], sumtrans, sumrew, sumw)
                sumtrans[h] += w * curtrans[h]
            else:
                for a in range(game.nactsOnHist[h]):
                    avgchance(game.histSucc[h][a], curtrans, currew, w,
                              sumtrans, sumrew, sumw)

        avgchance(0, transpsrl, rewpsrl, 1.0, self.sampledtrans1,
                  self.sampledrews1, self.weight1)

        self.update(0, 0, [np.ones(1), np.ones(1)], [0], rewpsrl,
                    transpsrl)  #the CFR algorithm
        self.update(1, 0, [np.ones(1), np.ones(1)], [0], rewpsrl,
                    transpsrl)  #the CFR algorithm

        def updStgy(owner, iset):
            if self.isetflag[owner][iset] != self.round:
                return
            player = game.playerOfIset[owner][iset]
            if player == owner:
                self.stgy[owner][iset] = self.solvers[owner][
                    iset].curstgy.copy()
            for nxtiset in game.isetSucc[owner][iset]:
                updStgy(owner, nxtiset)

        updStgy(0, 0)
        updStgy(1, 0)

        def updSumstgy(owner, iset, prob=1.0):
            player = game.playerOfIset[owner][iset]
            if player == owner:
                self.sumstgy[owner][iset] += prob * self.stgy[player][iset]
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    if prob * self.stgy[player][iset][aid] > 1e-8:
                        updSumstgy(owner, nxtiset,
                                   prob * self.stgy[player][iset][aid])
            else:
                for aid, nxtiset in enumerate(game.isetSucc[owner][iset]):
                    updSumstgy(owner, nxtiset, prob)

        updSumstgy(0, 0)
        updSumstgy(1, 0)

        avgstgy = self.avgstgyprofile()

        def getExploreStgy(owner, iset, explore_stgy, oppstgy, ds_c1, ds_c2):
            rew1, trans1, reachp1 = ds_c1
            rew2, trans2, reachp2 = ds_c2
            hists = game.Iset2Hists[owner][iset]
            if game.isTerminal[hists[0]] == True:
                return
            player = game.playerOfIset[owner][iset]
            if player == owner:

                nacts = game.nactsOnIset[owner][iset]

                outcome1 = np.zeros(nacts)
                outcome2 = np.zeros(nacts)
                for a in range(nacts):
                    getExploreStgy(owner, game.isetSucc[owner][iset][a],
                                   explore_stgy, oppstgy, ds_c1, ds_c2)

                    for h in hists:
                        outcome1[a] += reachp1[h] * rew1[game.histSucc[h]
                                                         [a]][owner]
                        outcome2[a] += reachp2[h] * rew2[game.histSucc[h]
                                                         [a]][owner]
                a_star = np.argmax(outcome1 - outcome2)
                _stgy = np.zeros(nacts)
                _stgy[a_star] = 1

                explore_stgy[iset] = _stgy

                for h in hists:
                    rew1[h] = rew1[game.histSucc[h][a_star]]
                    rew2[h] = rew2[game.histSucc[h][a_star]]

            else:
                truenacts = game.nactsOnHist[hists[0]]
                obsnacts = game.nactsOnIset[owner][iset]
                for h in hists:
                    _stgy1 = None
                    _stgy2 = None
                    if player == 2:
                        _stgy1 = trans1[h]
                        _stgy2 = trans2[h]
                    else:
                        piset = game.Hist2Iset[player][h]
                        _stgy1 = oppstgy[piset]
                        _stgy2 = oppstgy[piset]
                    nactsh = game.nactsOnHist[h]
                    for a in range(nactsh):
                        reachp1[game.histSucc[h][a]] = reachp1[h] * _stgy1[a]
                        reachp2[game.histSucc[h][a]] = reachp2[h] * _stgy2[a]

                for a in range(obsnacts):
                    getExploreStgy(owner, game.isetSucc[owner][iset][a],
                                   explore_stgy, oppstgy, ds_c1, ds_c2)
                for h in hists:
                    _stgy1 = None
                    _stgy2 = None
                    if player == 2:
                        _stgy1 = trans1[h]
                        _stgy2 = trans2[h]
                    else:
                        piset = game.Hist2Iset[player][h]
                        _stgy1 = oppstgy[piset]
                        _stgy2 = oppstgy[piset]
                    nactsh = game.nactsOnHist[h]
                    for a in range(nactsh):
                        rew1[h] += rew1[game.histSucc[h][a]] * _stgy1[a]
                        rew2[h] += rew2[game.histSucc[h][a]] * _stgy2[a]
            if iset == 0:
                pass
                #print("check", rew1[0] - rew2[0], rew1[0], rew2[0])

        #avgchrew = np.zeros(game.numHists).tolist()
        #def getavgchance(h, avgchrew, avgchtrans):

        #getavgchance(0, avgchrew, avgchtrans)
        prob1 = np.ones(game.numHists)
        prob2 = np.ones(game.numHists)
        explore_stgy = [[], []]

        for i, iset in enumerate(range(game.numIsets[0])):
            nact = game.nactsOnIset[0][iset]
            if game.playerOfIset[0][iset] == 0:
                explore_stgy[0].append(np.ones(nact) / nact)
            else:
                explore_stgy[0].append(np.ones(0))
        for i, iset in enumerate(range(game.numIsets[1])):
            nact = game.nactsOnIset[1][iset]
            if game.playerOfIset[1][iset] == 1:
                explore_stgy[1].append(np.ones(nact) / nact)
            else:
                explore_stgy[1].append(np.ones(0))

        avgchtrans = copy.deepcopy(self.sampledtrans1)
        avgchrew = copy.deepcopy(self.sampledrews1)

        for h in range(game.numHists):
            if game.isTerminal[h]:
                avgchrew[h] /= self.weight1[h]
            if game.playerOfHist[h] == 2:
                avgchtrans[h] /= self.weight1[h]

        if self.Type == "default":
            getExploreStgy(0, 0, explore_stgy[0], avgstgy[1],
                           (rewvalidation, transvalidation, prob2),
                           (avgchrew, avgchtrans, prob1))
            getExploreStgy(1, 0, explore_stgy[1], avgstgy[0],
                           (rewvalidation, transvalidation, prob2),
                           (avgchrew, avgchtrans, prob1))
            for t in range(1):
                simulate(game, 0, [explore_stgy[0], self.stgy[1]])
                simulate(game, 0, [self.stgy[0], explore_stgy[1]])

        if self.Type == "br_dirc":
            getExploreStgy(0, 0, explore_stgy[0], avgstgy[1],
                           (rewvalidation, transvalidation, prob2),
                           (avgchrew * 0.0, avgchtrans, prob1))
            getExploreStgy(1, 0, explore_stgy[1], avgstgy[0],
                           (rewvalidation, transvalidation, prob2),
                           (avgchrew * 0.0, avgchtrans, prob1))
            for t in range(1):
                simulate(game, 0, [explore_stgy[0], self.stgy[1]])
                simulate(game, 0, [self.stgy[0], explore_stgy[1]])

        if self.Type == "ordinary":
            simulate(game, 0, self.stgy)
            simulate(game, 0, self.stgy)
        if self.Type == "random":
            for i in range(2):
                for iset in range(game.numIsets[i]):
                    pl = game.playerOfIset[i][iset]
                    explore_stgy[i].append(0)
                    if pl == i:
                        nacts = game.nactsOnIset[i][iset]
                        explore_stgy[i][iset] = np.ones(nacts) / nacts
            simulate(game, 0, explore_stgy)
            simulate(game, 0, explore_stgy)