def run_simulation(sim_vals) -> float: """ Run earthworm with a policy given in argument. This function is for multiprocessing. sim_vals: ( steps: int, actor_module_name: str, actor_params: [np.array(params_0), np.array(params_1), ...], disable_list: list ) """ steps, actor_module_name, actor_params, disable_list = sim_vals assert isinstance(steps, int) # assert isinstance(actor_params, Iterable) assert isinstance(disable_list, Iterable) # Init actor actor_module = import_module(actor_module_name) actor = getattr(actor_module, config.COMMON_ACTOR_NAME)() actor.set_params(actor_params) # Init earthworm earthworm = Earthworm(config.somites, config.oscillators_list, config.earthworm_params) locomotion_distance = utils.locomotion_distance_logger(earthworm) # Run steps (accumulated_tension, ) = exec_steps(steps, actor, earthworm, disable_list=disable_list) reward = locomotion_distance(earthworm) return reward
def observe_and_act(actor: base_actor.BaseActor, earthworm: Earthworm, disable_list: list, broken_value=0, episode=None): assert np.all(np.array(disable_list) < config.somites) if disable_list is None: disable_list = [] mask = np.ones(config.somites) mask[disable_list] = 0 bias = np.zeros(config.somites) bias[disable_list] = broken_value frictions = earthworm.frictions_x() tensions = earthworm.tensions() phases = earthworm.somite_phases() state = np.concatenate([frictions * mask + bias, phases, tensions]) action = actor.get_action(state) observation = (np.array(phases), np.array(frictions), np.array(tensions)) return observation, action
def exec_steps(steps: int, actor: base_actor.BaseActor, earthworm: Earthworm, disable_list: list, episode=None) -> float: """ Run earthworm for designated steps. return run information which is (accumulated tensions,) """ accumulated_tension = 0 for step in range(steps): (_, _, tensions), action = earthworm_runner.observe_and_act(actor, earthworm, disable_list, episode=episode) accumulated_tension += np.sum(np.power(tensions, 2)) # for (oscillator_id, target_angle) in config.fixed_angles.items(): # earthworm.set_target_angle(oscillator_id, target_angle) earthworm.step_with_feedbacks(config.params["time_delta"], tuple(action)) return (accumulated_tension, )
def run_earthworm(actor, save_dir: str, steps: int, disable_list=None, broken_value=0): if disable_list is None: disable_list = [] earthworm = Earthworm(config.somites, config.oscillators_list, config.earthworm_params) # Record during sampling sim_distance_file = DataCSVSaver(os.path.join(save_dir, "distance.txt"), ("step", "distance")) sim_somite_phase_diffs_file = DataCSVSaver( os.path.join(save_dir, "somite_phase_diffs.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_somite_phases_file = DataCSVSaver( os.path.join(save_dir, "somite_phases.txt"), ["step"] + ["phi_{}".format(i) for i in range(config.oscillators)]) sim_somite_actions_file = DataCSVSaver( os.path.join(save_dir, "somite_actions.txt"), ["step"] + ["action_{}".format(i) for i in range(config.oscillators)]) sim_frictions_file = DataCSVSaver( os.path.join(save_dir, "frictions.txt"), ["step"] + ["friction_{}".format(i) for i in range(config.somites)]) sim_tension_file = DataCSVSaver( os.path.join(save_dir, "tensions.txt"), ["step"] + ["tension_{}".format(i) for i in range(config.somites - 1)]) locomotion_distance = utils.locomotion_distance_logger( earthworm) # closure to keep locomotion distance for step in range(steps): obv, action = observe_and_act(actor, earthworm, disable_list=disable_list, broken_value=broken_value) # for (oscillator_id, target_angle) in config.fixed_angles.items(): # earthworm.set_target_angle(oscillator_id, target_angle) earthworm.step_with_feedbacks(config.params["time_delta"], tuple(action)) # Save data phases, frictions, tensions = obv sim_distance_file.append_data(step, locomotion_distance(earthworm)) sim_somite_phase_diffs_file.append_data( step, *utils.phase_diffs(phases[:config.oscillators])) sim_somite_phases_file.append_data( step, *utils.mod2pi(phases[:config.oscillators])) sim_frictions_file.append_data(step, *frictions) sim_tension_file.append_data(step, *tensions) sim_somite_actions_file.append_data(step, *action) earthworm.save_simulation("{}/render.json".format(save_dir)) return locomotion_distance(earthworm)
def new_earthworm() -> Earthworm: return Earthworm(config.somites, config.oscillators_list, config.earthworm_params)
def train_worm(save_dir: utils.SaveDir, actor_module_name: str): actor_module = import_module(actor_module_name) actor_class = getattr(actor_module, config.COMMON_ACTOR_NAME) actor = actor_class() # Dump train parameters config.print_config() config.dump_config(save_dir.log_dir(), actor.dump_config()) pepg = PEPG(actor.params_num()) pepg.set_parameters( np.fromiter( itertools.chain.from_iterable( [p.flatten().tolist() for p in actor.current_params()]), np.float64)) distance_log = DataCSVSaver( os.path.join(save_dir.log_dir(), "distance.txt"), ("episode", "distance")) reward_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "reward.txt"), ("episode", "reward")) sigma_log = DataCSVSaver(os.path.join(save_dir.log_dir(), "sigma.txt"), ("episode", "average sigma")) episode = 0 while episode < config.params["episodes"]: current_mus = pepg.get_parameters() epsilons = pepg.sample_epsilons(config.params["batch_size"]) params_sets = np.concatenate([ current_mus[:, np.newaxis] + epsilons, current_mus[:, np.newaxis] - epsilons ], axis=1).T print("\nEpisode: {}".format(episode)) print( "---------------------------------------------------------------------" ) rewards = [] try: with Pool(processes=config.exec_params["worker_processes"], initializer=mute) as pool: rewards = pool.map( run_simulation, [(config.params["steps"], actor_module_name, p_set, []) for p_set in params_sets]) sample_num = epsilons.shape[1] r_plus = np.array(rewards[:sample_num]) r_minus = np.array(rewards[sample_num:]) nan_samples = np.where(np.isnan(r_plus) + np.isnan(r_minus))[0] # delete nan samples epsilons = np.delete(epsilons, nan_samples, axis=1) r_plus = np.delete(r_plus, nan_samples, axis=0) r_minus = np.delete(r_minus, nan_samples, axis=0) if epsilons.shape[1] > 0: pepg.update_parameters(epsilons, r_plus, r_minus) episode += 1 # Try parameters after this episode -------------------------------------- actor.set_params(pepg.get_parameters()) earthworm = Earthworm(config.somites, config.oscillators_list, config.earthworm_params) locomotion_distance = utils.locomotion_distance_logger( earthworm) (accumulated_tension, ) = exec_steps(config.params["steps"], actor, earthworm, [], episode=episode - 1) d = locomotion_distance(earthworm) reward = d # Save parameter performance distance_log.append_data(episode, d) sigma_log.append_data(episode, np.mean(pepg.get_sigmas())) reward_log.append_data(episode, reward) announce = " --- Distance: {} Reward: {}".format(d, reward) print(announce) else: print("got nan position. update failed") except KeyboardInterrupt: command = input("\nSample? Finish? : ") if command in ["sample", "Sample"]: actor.set_params(pepg.get_parameters()) test_current_params(actor, save_dir.log_dir(), episode) continue if command in ["finish", "Finish"]: print("Ending training ...") break actor.set_params(pepg.get_parameters()) actor.save(os.path.join(save_dir.model_dir(), 'actor_model.pickle')) return actor
def extract_earthworm_position(earthworm: Earthworm): positions = earthworm.somite_positions() index = int(np.floor(len(positions) / 2)) # index = len(positions) - 1 return positions[index][0]