def run(agent: DDPG, steps: int, save_dir: str) -> float:
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance"))
    sim_somite_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phase_diffs_file = DataCSVSaver(os.path.join(save_dir, "gripper_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_phases_file = DataCSVSaver(os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_phases_file = DataCSVSaver(os.path.join(save_dir, "gripper_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.grippers)])
    sim_somite_actions_file = DataCSVSaver(os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_gripper_actions_file = DataCSVSaver(os.path.join(save_dir, "gripper_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.grippers)])
    sim_frictions_file = DataCSVSaver(os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 2)])

    caterpillar = Caterpillar(config.somites, config.oscillators_list, config.grippers_list, config.caterpillar_params)
    locomotion_distance = utils.locomotion_distance_logger(caterpillar)
    for step in range(steps):
        obs, somite_phases, gripper_phases = observe(caterpillar)
        actions = agent.act(obs)
        feedbacks_somite, feedbacks_gripper = actions[:config.oscillators], actions[config.oscillators:]
        caterpillar.step_with_feedbacks(config.params["time_delta"], tuple(feedbacks_somite), tuple(feedbacks_gripper))

        frictions, tensions, _, _ = np.split(
            obs, [config.somites, config.somites * 2 - 2, config.somites * 2 - 2 + config.oscillators])
        sim_distance_file.append_data(step, locomotion_distance(caterpillar))
        sim_somite_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(somite_phases)))
        sim_gripper_phase_diffs_file.append_data(step, *utils.phase_diffs(np.array(gripper_phases)))
        sim_somite_phases_file.append_data(step, *utils.mod2pi(np.array(somite_phases)))
        sim_gripper_phases_file.append_data(step, *utils.mod2pi(np.array(gripper_phases)))
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)
        sim_somite_actions_file.append_data(step, *feedbacks_somite)
        sim_gripper_actions_file.append_data(step, *feedbacks_gripper)

    caterpillar.save_simulation("{}/render.json".format(save_dir))
    return locomotion_distance(caterpillar)
Ejemplo n.º 2
0
    rbuf,
    gamma=args.gamma,
    explorer=explorer,
    replay_start_size=5000,
    target_update_method='soft',
    target_update_interval=1,
    update_interval=4,
    soft_update_tau=1e-2,
    n_times_update=1,
    phi=phi,
    minibatch_size=128  #args.minibatch_size
)

# Cargar el modelo guardado
agent.load("DDPG_last_model_rew8k")

for i in range(args.test_episodes):
    obs = env.reset()
    done = False
    reward_Alan = 0
    t = 0
    while not done and t < 200:
        #env.render()
        #agent.load("DDPG_best_model")
        action = agent.act(obs)
        obs, r, done, x = env.step(action)
        reward_Alan += r
        t += 1
    print('Test episode:', i, 'Reward obtained: ', reward_Alan)
    #agent.stop_episode()