def test(): env = Environment.Env(4, 4, 0.15) obstacles = [(1, 1), (1, 3)] for obs in obstacles: env.addobstacle(obs) agent1 = Environment.agent(env) cat1 = Environment.agent(env, autopolicy=True) cat2 = Environment.agent(env, autopolicy=True) with open(".\Cost1\policy_sto.pkl", "rb") as f: policy = pickle.load(f) mdp = MDP() mdp.addplayer(agent1) mdp.addplayer(cat1) mdp.addplayer(cat2) #Test calculate gridworld mdp.calgridworldsp() # for st in mdp.gridworldspace: # print (st) assert len( mdp.gridworldspace) == 2744, 'gridworldspace section is not correct' # Test calculate statespace mdp.addpolicy(policy) mdp.addpolicy(policy) mdp.calstatespace() assert len(mdp.statespace) == 5488, 'statespace section is not correct' # Test calculate actionspace mdp.calactionspace() assert len(mdp.actionspace) == 2, 'actionspace 2 section is not correct' mdp.setT(4) mdp.calactionspace() assert len(mdp.actionspace) == 8, 'actionspace 8 section is not correct' mdp.termst = mdp.terminalSt() mdp.penalty = mdp.setpenalty() mdp.cal_Kstep_Matrix() # stindex = mdp.invertedindex[(((1, 2), (1, 2), (2, 3)), 1)] # stindex2 = mdp.invertedindex[(((1, 2), (1, 2), (2, 3)), 2)] # P = mdp.transitMatrix[2][2][stindex] # if abs(sum(P) -1) > 0.01: # print(123) # print(P[stindex]) # print(P[stindex2]) V, policy = mdp.valueFunc_semi(stochastic=False) filename = ".\Cost1\V_semi_4.pkl" file = open(filename, "wb") pickle.dump(V, file) file.close() filename = ".\Cost1\policy_semi_4.pkl" file = open(filename, "wb") pickle.dump(policy, file) file.close()
def test(): env = Environment.Env(4, 4, 0.15) obstacles = [(1, 1), (1, 3)] for obs in obstacles: env.addobstacle(obs) agent1 = Environment.agent(env) cat1 = Environment.agent(env, autopolicy=True) cat2 = Environment.agent(env, autopolicy=True) #Choose Policy with open(".\Revision1\policy_sto.pkl", "rb") as f: policy_act = pickle.load(f) with open(".\Revision1\policy_semi_2.pkl", "rb") as f: policy = pickle.load(f) agent_ini = (1, 0) cat1_ini = (3, 2) cat2_ini = (0, 3) state = (agent_ini, cat1_ini, cat2_ini) steplist = [] trajlist = [] sensorrewardlist = [] penaltylist = [] # for i in range(1000): # stepcount, traj, penaltycount = simulate_2(agent1, cat1, agent_ini, cat1_ini, policy_act) # steplist.append(stepcount) # trajlist.append(trajlist) # # costlist.append(cost) # penaltylist.append(penaltycount) for i in range(1000): stepcount, traj, sensorreward, penaltycount = simulate_3( agent1, cat1, cat2, state, policy, policy_act) steplist.append(stepcount) trajlist.append(trajlist) sensorrewardlist.append(sensorreward) penaltylist.append(penaltycount) filename = ".\Revision1\steplist2.pkl" file = open(filename, "wb") pickle.dump(steplist, file) file.close() filename = ".\Revision1\\trajlist2.pkl" file = open(filename, "wb") pickle.dump(trajlist, file) file.close() print(sum(steplist) / 1000) print(sum(sensorrewardlist) / 1000) print(sum(penaltylist) / 1000)