def build_long_term_larget(env: Environment, dt, N): ind = env.get_current_waypoint_index() track = env.get_track() pos = env.car.get_position() desired_v = 80 dist_travel = desired_v * dt def get_point(start, end, d_to_go): x0, y0 = start x1, y1 = end dy = y1 - y0 dx = x1 - x0 d = np.linalg.norm((dx, dy)) x = x0 + d_to_go * dx / d y = y0 + d_to_go * dy / d return np.array((x, y)) cur_pos = np.array(pos) ind = ind % len(track) cur_target = np.array(track[ind][2:4]) result = [pos] for i in range(N - 1): remain_dist = np.linalg.norm(cur_target - cur_pos) - dist_travel if remain_dist > 0: p = get_point(cur_pos, cur_target, dist_travel) result.append(p) cur_pos = p else: # must ensure distance between 2 target points larger than dist_travel cur_pos = cur_target ind = (ind + 1) % len(track) cur_target = np.array(track[ind][2:4]) p = get_point(cur_pos, cur_target, -remain_dist) result.append(p) cur_pos = p return result
def initialize(self, config): self.config = config # initialize agent self.agent = Agent(critic_arch=self.critic_arch, actor_arch=self.actor_arch, critic_hidden_size=self.config['CRITIC_SIZE'], actor_hidden_size=self.config['ACTOR_SIZE'], action_size=self.config['ACTION_SIZE'], memory_size=self.config['MEMORY_SIZE'], critic_learning_rate=self.config['CRITIC_LR'], actor_learning_rate=self.config['ACTOR_LR'], gamma=self.config['GAMMA'], tau=self.config['TAU']) # intialize environment self.env = Environment(agent=self.agent, classifier=self.classifier, img_dir=self.config['IMG_PATH'], img_size=self.config['IMG_SIZE'], blob_size=self.config['BLOB_SIZE'], done_threshold=self.config['DONE_THRESHOLD']) self.noise = OUNoise(self.config['ACTION_SIZE'])
def main(): """ :return: """ setup_logging() op = OptionParser("Usage: %prog [options] " "(tasks_config.json | tasks_config.py)") op.add_option('-o', '--output', default='results.out', help='File where the simulation results are saved.') op.add_option('--scramble', action='store_true', default=False, help='Randomly scramble the words in the tasks for ' 'a human player.') op.add_option('-w', '--show-world', action='store_true', default=False, help='shows a visualization of the world in the console ' '(mainly for debugging)') op.add_option('-d', '--time-delay', default=0, type=float, help='adds some delay between each timestep for easier' ' visualization.') op.add_option('-l', '--learner', default='learners.human_learner.HumanLearner', help='Defines the type of learner.') op.add_option('-v', '--view', default='BaseView', help='Viewing mode.') op.add_option('-s', '--serializer', default='core.serializer.StandardSerializer', help='Sets the encoding of characters into bits') op.add_option('--learner-cmd', help='The cmd to run to launch RemoteLearner.') op.add_option('--learner-port', default=5556, help='Port on which to accept remote learner.') op.add_option('--max-reward-per-task', default=10, type=int, help='Maximum reward that we can give to a learner for' ' a given task.') opt, args = op.parse_args() if len(args) == 0: op.error("Tasks schedule configuration file required.") tasks_config_file = args[0] # Retrieve task configuration logger = logging.getLogger(__name__) logger.info("Starting new evaluation session") serializer = create_serializer(opt.serializer) # Set hoe enviroment produces and interprets a bit signal learner = create_learner(opt.learner, serializer, opt.learner_cmd, opt.learner_port) # Create learner task_scheduler = create_tasks_from_config(tasks_config_file) # Create tasks, add to scheduler to be served env = Environment(serializer, task_scheduler, opt.scramble, opt.max_reward_per_task) # Construct environment session = Session(env, learner, opt.time_delay) # a learning session view = create_view(opt.view, opt.learner, env, session, serializer, opt.show_world) # setup view try: learner.set_view(view) # Send interface to human learner except AttributeError: # not human. pass pass try: view.initialize() # talk session.run() except BaseException: view.finalize() save_results(session, opt.output) raise else: view.finalize()
def main(): env = gym.make('CarRacing-v0') env = Environment(env=env, FPS=50.0) done = False env.reset() # car = env.unwrapped.car # w = car.wheels[0] dt = 1 / FPS prev_a = MAX_a prev_steer = 0 total_reward = 0 # ego_car = Car(car, prev_a, prev_steer) while not done: print('########################') long_term_xs, long_term_ys = env.calc_long_term_targets() a, steer, x, y = long_term_MPC(env.car, list(zip(long_term_xs, long_term_ys)), dt) short_term_N = 5 short_term_target = list(zip(x, y))[:short_term_N] a, steer, x, y = short_term_MPC(env.car, short_term_target, dt) print(a, steer, env.car.get_velocity()) a = a / MAX_a steer = steer if a > 0: action = Action(steer, a / 10, 0) else: action = Action(steer, 0, -a) _, r, done, _ = env.step(action) env.car.take_control(action) total_reward += r env.render() print(total_reward)
def process_world(conn, opt, tasks_config_file, world_id): try: serializer = StandardSerializer() task_scheduler = create_tasks_from_config(tasks_config_file) env = Environment(serializer, task_scheduler, opt.scramble, opt.max_reward_per_task, not opt.bit_mode) learner = create_learner(opt.learner, serializer, opt.learner_cmd, opt.learner_port, not opt.bit_mode) session = Session(env, learner, opt.time_delay) args = conn.recv() while not (args is None): episode_id, step_count, seed, weight = args # INTERACTION BETWEEN ENVIRONMENT AND AGENT learner.net.set_genotype_weight(weight, seed) del weight episode_reward = session.iterate_n(step_count) # save_results(session, opt.output) conn.send((episode_reward, seed)) args = conn.recv() except BaseException as e: print(e) conn.send(None)
__author__ = 'aldnav' from core.environment import Environment if __name__ == '__main__': env = Environment() env.simulate()
def main(): setup_logging() op = OptionParser("Usage: %prog [options] " "(tasks_config.json | tasks_config.py)") op.add_option('-o', '--output', default='results.out', help='File where the simulation results are saved.') op.add_option('--scramble', action='store_true', default=False, help='Randomly scramble the words in the tasks for ' 'a human player.') op.add_option('-w', '--show-world', action='store_true', default=False, help='shows a visualization of the world in the console ' '(mainly for debugging)') op.add_option('-d', '--time-delay', default=0, type=float, help='adds some delay between each timestep for easier' ' visualization.') op.add_option('-l', '--learner', default='learners.human_learner.HumanLearner', help='Defines the type of learner.') op.add_option('-v', '--view', default='BaseView', help='Viewing mode.') op.add_option('--learner-cmd', help='The cmd to run to launch RemoteLearner.') op.add_option('--learner-port', default=5556, type=int, help='Port on which to accept remote learner.') op.add_option('--learner-address', help='Network address on which the remote learner listens.') op.add_option('--max-reward-per-task', default=2147483647, type=int, help='Maximum reward that we can give to a learner for' ' a given task.') op.add_option('--curses', action='store_true', default=False, help='Uses standard output instead of curses library.') op.add_option('--bit-mode', action='store_true', default=False, help='Environment receives input in bytes.') opt, args = op.parse_args() if len(args) == 0: op.error("Tasks schedule configuration file required.") # retrieve the task configuration file tasks_config_file = args[0] logger = logging.getLogger(__name__) logger.info("Starting new evaluation session") # we choose how the environment will produce and interpret # the bit signal serializer = StandardSerializer() # create a learner (the human learner takes the serializer) learner = create_learner(opt.learner, serializer, opt.learner_cmd, opt.learner_port, opt.learner_address, not opt.bit_mode) # create our tasks and put them into a scheduler to serve them task_scheduler = create_tasks_from_config(tasks_config_file) # construct an environment env = Environment(serializer, task_scheduler, opt.scramble, opt.max_reward_per_task, not opt.bit_mode) # a learning session session = Session(env, learner, opt.time_delay) # setup view view = create_view(opt.view, opt.learner, env, session, serializer, opt.show_world, opt.curses, not opt.bit_mode) try: # send the interface to the human learner learner.set_view(view) except AttributeError: # this was not a human learner, nothing to do pass try: view.initialize() # ok guys, talk session.run() except BaseException: view.finalize() save_results(session, opt.output) raise else: view.finalize()
class Trainer: def __init__(self, config, critic_arch, actor_arch, classifier): self.config = config self.critic_arch, self.actor_arch, self.classifier = critic_arch, actor_arch, classifier self.initialize(self.config) def initialize(self, config): self.config = config # initialize agent self.agent = Agent(critic_arch=self.critic_arch, actor_arch=self.actor_arch, critic_hidden_size=self.config['CRITIC_SIZE'], actor_hidden_size=self.config['ACTOR_SIZE'], action_size=self.config['ACTION_SIZE'], memory_size=self.config['MEMORY_SIZE'], critic_learning_rate=self.config['CRITIC_LR'], actor_learning_rate=self.config['ACTOR_LR'], gamma=self.config['GAMMA'], tau=self.config['TAU']) # intialize environment self.env = Environment(agent=self.agent, classifier=self.classifier, img_dir=self.config['IMG_PATH'], img_size=self.config['IMG_SIZE'], blob_size=self.config['BLOB_SIZE'], done_threshold=self.config['DONE_THRESHOLD']) self.noise = OUNoise(self.config['ACTION_SIZE']) def run(self): batch_size = self.config['BATCH_SIZE'] rewards = [] avg_rewards = [] for episode in range(self.config['N_EPISODES']): state = self.env.reset() self.noise.reset() episode_reward = 0 bar = progress_bar(range(self.config['MAX_LENGTH_EPISODE'])) for step in bar: action = self.agent.get_action(state) action = self.noise.get_action(action=action, t=step) new_state, reward, done, info = self.env.step(action) self.agent.memory.push(state, action, reward, new_state, done) if len(self.agent.memory) > batch_size: self.agent.update(batch_size) state = new_state episode_reward += reward bar.comment = f"reward: {reward:.3f} - episode_reward: {episode_reward:.3f}" if done: print( f"Done episode {episode:<3} with reward of: {episode_reward:.2f}, avg reward: {np.mean(rewards)}" ) break rewards.append(episode_reward) avg_rewards.append(np.mean(rewards)) return rewards, avg_rewards def save(self, name=None): """ specify `name` to override experiment name in config """ name = name or self.config['NAME'] state_dict = { 'critic': self.agent.critic.state_dict(), 'actor': self.agent.actor.state_dict(), 'critic_target': self.agent.critic_target.state_dict(), 'actor_target': self.agent.actor_target.state_dict() } torch.save(state_dict, f"{self.config['WEIGHT_PATH']}/{name}.pth") def load(self, name=None): """ specify `name` to override experiment name in config """ name = name or self.config['NAME'] state_dict = torch.load(f"{self.config['WEIGHT_PATH']}/{name}.pth") self.agent.critic.load_state_dict(state_dict['critic']) self.agent.actor.load_state_dict(state_dict['actor']) self.agent.critic_target.load_state_dict(state_dict['critic_target']) self.agent.actor_target.load_state_dict(state_dict['actor_target'])
def create_base_environment(self): env = Environment() # env.add_primitives(self.MATH_BINDINGS) env.add_primitives(self.OP_BINDINGS) env.add_primitives({'exit': (self.exit_function(), 'Void -> Void')}) return env