def train_caterpillar(save_dir: utils.SaveDir, actor_module_name: str):
    # Dump train parameters
    config.print_config()

    actor_module = import_module(actor_module_name)
    actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME)
    rlm = PEPGActorManager(actor_class)

    caterpillar = Caterpillar(config.somites, mode="default")
    config.dump_config(save_dir.log_dir())

    distance_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "distance.txt"), ("episode", "distance"))
    reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"), ("episode", "reward"))
    sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"), ("episode", "average sigma"))

    episode = 0
    while episode < config.params["episodes"]:
        params_sets = rlm.sample_params()

        print("\nEpisode: {}".format(episode))
        print("---------------------------------------------------------------------")
        rewards = []

        try:
            with Pool(processes=config.exec_params["worker_processes"], initializer=mute) as pool:
                rewards = pool.map(run_simulation, [(config.params["steps"], actor_module_name, p_set, []) for p_set in params_sets])

            rlm.update_params(np.array(rewards))
            episode += 1

            # Try parameters after this episode --------------------------------------
            rlm.set_params(rlm.parameters)
            caterpillar.reset()

            (accumulated_tension,) = exec_steps(config.params["steps"], rlm.get_actor(), caterpillar, [], episode=episode - 1)
            reward = caterpillar.moved_distance() - accumulated_tension / config.params["tension_divisor"]

            # Save parameter performance
            distance_log.append_data(episode, caterpillar.moved_distance())
            sigma_log.append_data(episode, np.mean(rlm.sigmas))
            reward_log.append_data(episode, reward)

            announce = "  --- Distance: {}   Reward: {}".format(caterpillar.moved_distance(), reward)
            print(announce)

        except KeyboardInterrupt:
            command = input("\nSample? Finish? : ")
            if command in ["sample", "Sample"]:
                rlm.set_params(rlm.parameters)
                test_current_params(rlm.get_actor(), save_dir.log_dir(), episode)
                continue
            if command in ["finish", "Finish"]:
                print("Ending training ...")
                break

    rlm.set_params(rlm.parameters)
    rlm.save(save_file_path=os.path.join(save_dir.model_dir(), 'actor_model.pickle'))
    return rlm.get_actor()
Exemple #2
0
def run_caterpillar(actor,
                    save_dir: str,
                    steps: int,
                    disable_list=None,
                    broken_value=0):
    if disable_list is None:
        disable_list = []

    caterpillar = Caterpillar(config.somites, mode="default")

    # Record during sampling
    sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"),
                                     ("step", "distance"))
    sim_phase_diffs_file = DataCSVSaver(
        os.path.join(save_dir, "phase_diffs.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_phases_file = DataCSVSaver(
        os.path.join(save_dir, "phases.txt"),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)])
    sim_actions_file = DataCSVSaver(
        os.path.join(save_dir, "actions.txt"),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)])
    sim_frictions_file = DataCSVSaver(
        os.path.join(save_dir, "frictions.txt"),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)])
    sim_tension_file = DataCSVSaver(
        os.path.join(save_dir, "tensions.txt"),
        ["step"] + ["tension_{}".format(i) for i in range(config.oscillators)])

    for step in range(steps):
        obv, action = observe_and_act(actor,
                                      caterpillar,
                                      disable_list=disable_list,
                                      broken_value=broken_value)
        _, frictions, tensions = obv
        caterpillar.step(step, int(1 / config.params["time_dalte"] / 10))

        # Save data
        sim_distance_file.append_data(step, caterpillar.moved_distance())
        sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base())
        sim_phases_file.append_data(step, *caterpillar.phis)
        sim_actions_file.append_data(step, *action)
        sim_frictions_file.append_data(step, *frictions)
        sim_tension_file.append_data(step, *tensions)

    caterpillar.save_simulation("{}/render.sim".format(save_dir))

    return caterpillar.moved_distance()
def run_simulation(sim_vals) -> float:
    """
    Run caterpillar with a policy given in argument.

    This function is for multiprocessing.
    sim_vals: (
        steps: int,
        actor_module_name: str,
        actor_params: [np.array(params_0), np.array(params_1), ...],
        disable_list: list
    )
    """
    steps, actor_module_name, actor_params, disable_list = sim_vals
    assert isinstance(steps, int)
    assert isinstance(actor_params, Iterable)
    assert isinstance(disable_list, Iterable)

    # Init actor
    actor_module = import_module(actor_module_name)
    actor = getattr(actor_module, config.COMMON_ACTOR_NAME)()
    actor.set_params(actor_params)

    # Init caterpillar
    caterpillar = Caterpillar(config.somites, mode="default")

    # Run steps
    (accumulated_tension,) = exec_steps(steps, actor, caterpillar, disable_list=disable_list)

    reward = caterpillar.moved_distance() - accumulated_tension / config.params["tension_divisor"]
    return reward
def test_current_params(actor: base_actor, log_dir: str, episode: int):
    steps = input("How many steps for this sample?: ")
    if steps == "":
        utils.notice("default steps {}".format(config.params["default_sample_steps"]))
        steps = config.params["default_sample_steps"]

    # Record during sampling
    sim_distance_file = DataCSVSaver(
        "{}/train_result_ep{}_distance.txt".format(log_dir, episode),
        ("step", "distance")
    )
    sim_phase_diffs_file = DataCSVSaver(
        "{}/train_result_ep{}_phase_diffs.txt".format(log_dir, episode),
        ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]
    )
    sim_actions_file = DataCSVSaver(
        "{}/train_result_ep{}_actions.txt".format(log_dir, episode),
        ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]
    )
    sim_frictions_file = DataCSVSaver(
        "{}/train_result_ep{}_frictions.txt".format(log_dir, episode),
        ["step"] + ["friction_{}".format(i) for i in range(config.somites)]
    )

    caterpillar = Caterpillar(config.somites, mode="default")
    for step in range(int(steps)):
        try:
            _, action = caterpillar_runner.observe_and_act(actor, caterpillar, [])
            caterpillar.step(step, int(1 / config.params["time_dalte"] / 10))
        except Exception:
            continue
        else:
            # Save data
            sim_distance_file.append_data(step, caterpillar.moved_distance())
            sim_phase_diffs_file.append_data(step, *caterpillar.phases_from_base())
            sim_actions_file.append_data(step, *action)
            frictions = caterpillar.frictions()
            sim_frictions_file.append_data(step, *frictions)

    print("Moved distance:", caterpillar.moved_distance())
    caterpillar.save_simulation("{}/train_result_ep{}.sim".format(log_dir, episode))