コード例 #1
0
def test():
    env = Environment.Env(4, 4, 0.15)
    obstacles = [(1, 1), (1, 3)]
    for obs in obstacles:
        env.addobstacle(obs)
    agent1 = Environment.agent(env)
    cat1 = Environment.agent(env, autopolicy=True)
    cat2 = Environment.agent(env, autopolicy=True)
    with open(".\Cost1\policy_sto.pkl", "rb") as f:
        policy = pickle.load(f)
    mdp = MDP()

    mdp.addplayer(agent1)
    mdp.addplayer(cat1)
    mdp.addplayer(cat2)
    #Test calculate gridworld
    mdp.calgridworldsp()
    # for st in mdp.gridworldspace:
    #     print (st)
    assert len(
        mdp.gridworldspace) == 2744, 'gridworldspace section is not correct'

    # Test calculate statespace
    mdp.addpolicy(policy)
    mdp.addpolicy(policy)
    mdp.calstatespace()
    assert len(mdp.statespace) == 5488, 'statespace section is not correct'

    # Test calculate actionspace
    mdp.calactionspace()
    assert len(mdp.actionspace) == 2, 'actionspace 2 section is not correct'

    mdp.setT(4)
    mdp.calactionspace()
    assert len(mdp.actionspace) == 8, 'actionspace 8 section is not correct'
    mdp.termst = mdp.terminalSt()
    mdp.penalty = mdp.setpenalty()
    mdp.cal_Kstep_Matrix()
    # stindex = mdp.invertedindex[(((1, 2), (1, 2), (2, 3)), 1)]
    # stindex2 = mdp.invertedindex[(((1, 2), (1, 2), (2, 3)), 2)]
    # P = mdp.transitMatrix[2][2][stindex]
    # if abs(sum(P) -1) > 0.01:
    #     print(123)
    # print(P[stindex])
    # print(P[stindex2])
    V, policy = mdp.valueFunc_semi(stochastic=False)
    filename = ".\Cost1\V_semi_4.pkl"
    file = open(filename, "wb")
    pickle.dump(V, file)
    file.close()
    filename = ".\Cost1\policy_semi_4.pkl"
    file = open(filename, "wb")
    pickle.dump(policy, file)
    file.close()
コード例 #2
0
def test():
    env = Environment.Env(4, 4, 0.15)
    obstacles = [(1, 1), (1, 3)]
    for obs in obstacles:
        env.addobstacle(obs)
    agent1 = Environment.agent(env)
    cat1 = Environment.agent(env, autopolicy=True)
    cat2 = Environment.agent(env, autopolicy=True)
    #Choose Policy
    with open(".\Revision1\policy_sto.pkl", "rb") as f:
        policy_act = pickle.load(f)

    with open(".\Revision1\policy_semi_2.pkl", "rb") as f:
        policy = pickle.load(f)

    agent_ini = (1, 0)
    cat1_ini = (3, 2)
    cat2_ini = (0, 3)
    state = (agent_ini, cat1_ini, cat2_ini)
    steplist = []
    trajlist = []
    sensorrewardlist = []
    penaltylist = []
    # for i in range(1000):
    #     stepcount, traj, penaltycount = simulate_2(agent1, cat1, agent_ini, cat1_ini, policy_act)
    #     steplist.append(stepcount)
    #     trajlist.append(trajlist)
    #     # costlist.append(cost)
    #     penaltylist.append(penaltycount)
    for i in range(1000):
        stepcount, traj, sensorreward, penaltycount = simulate_3(
            agent1, cat1, cat2, state, policy, policy_act)
        steplist.append(stepcount)
        trajlist.append(trajlist)
        sensorrewardlist.append(sensorreward)
        penaltylist.append(penaltycount)
    filename = ".\Revision1\steplist2.pkl"
    file = open(filename, "wb")
    pickle.dump(steplist, file)
    file.close()
    filename = ".\Revision1\\trajlist2.pkl"
    file = open(filename, "wb")
    pickle.dump(trajlist, file)
    file.close()
    print(sum(steplist) / 1000)
    print(sum(sensorrewardlist) / 1000)
    print(sum(penaltylist) / 1000)