def test(env): #TestAgent(env) total_score = 0 episode_num = 10 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) counter = 0 total = 100 for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5,0.51) for i in range(max_step): action = agent.Act(obs) #action = np.array([1,0]) #print("Action:", action) print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward #print("Done") #print(reward) break return
def train(config: Config): logger.info("Configuration\n" + OmegaConf.to_yaml(config)) config = _absolutize_paths(config) pop_size = 100 its = 70 sensor_count = 5 inter_count = 3 motor_count = 2 tmp = CTRNNAgent(motor_count=motor_count, sensor_count=sensor_count, inter_count=inter_count) lb, ub = tmp.get_range() genome_size = tmp.param_count() env = RaycastEnv(CTRNNEnv(**config.env, use_visual=False)) return test(env) algo = EvolutionarySearch(env, pop_size=pop_size, genotype_size=genome_size, lb=lb, ub=ub, max_iter=its) best = algo.run() logger.info("Best is :") logger.info(best)
def test(env): #TestAgent(env) total_score = 0 episode_num = 10000 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) counter = 0 total = 10000 labels = [] for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) for i in range(max_step): action = agent.Act(obs) counter += 1 if counter % 5 == 0: #print(counter // 5) labels.append(obs) total -= 1 if total == 0: df = pd.DataFrame(labels) df.to_csv("./labels.csv", index_label="index") return labels #action = np.array([1,0]) #print("Action:", action) #print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward #print("Done") #print(reward) break return
def EvaluateGenome(genome): episode_num = 6 max_step = 500 global E env = E agent = CTRNNAgent() agent.set_weights(genome) total_score = 0 for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) for i in range(max_step): action = agent.Act(obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: total_score += reward break return -total_score
def test(env): #TestAgent(env) total_score = 0 episode_num = 10 max_step = 500 agent = CTRNNAgent() agent.set_weights(get_initial(0)) print(agent) for i in range(episode_num): obs = env.reset() agent.brain.randomize_outputs(0.5, 0.51) episode_traj = [] for j in range(max_step): action, traj = agent.Act(obs, return_trajectory=True) episode_traj += traj #action = np.array([1,0]) #print("Action:", action) #print("Observation", obs) if np.isnan(action).any(): return 10 obs, reward, done, info = env.step(action) if done: traj = np.array(episode_traj) np.nan_to_num(traj, False) t = pd.DataFrame(traj) t.columns = ["I1", "I2", "I3", "M1", "M2"] t.plot() plt.show() total_score += reward #print("Done") #print(reward) break return