def test(env): #TestAgent(env) total_score = 0 episode_num = 10 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) counter = 0 total = 100 for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5,0.51) for i in range(max_step): action = agent.Act(obs) #action = np.array([1,0]) #print("Action:", action) print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward #print("Done") #print(reward) break return
def test(env): #TestAgent(env) total_score = 0 episode_num = 10000 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) counter = 0 total = 10000 labels = [] for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) for i in range(max_step): action = agent.Act(obs) counter += 1 if counter % 5 == 0: #print(counter // 5) labels.append(obs) total -= 1 if total == 0: df = pd.DataFrame(labels) df.to_csv("./labels.csv", index_label="index") return labels #action = np.array([1,0]) #print("Action:", action) #print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward #print("Done") #print(reward) break return
def EvaluateGenome(genome): episode_num = 6 max_step = 500 global E env = E agent = CTRNNAgent() agent.set_weights(genome) total_score = 0 for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) for i in range(max_step): action = agent.Act(obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward break return -total_score
def test(env): #TestAgent(env) total_score = 0 episode_num = 10 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) print(agent) for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) episode_traj = [] for j in range(max_step): action, traj = agent.Act(obs, return_trajectory=True) episode_traj += traj #action = np.array([1,0]) #print("Action:", action) #print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: traj = np.array(episode_traj) np.nan_to_num(traj, False) t = pd.DataFrame(traj) t.columns = ["I1", "I2", "I3", "M1", "M2"] t.plot() plt.show() total_score += reward #print("Done") #print(reward) break return