Example #1
0
def build_long_term_larget(env: Environment, dt, N):
    ind = env.get_current_waypoint_index()
    track = env.get_track()

    pos = env.car.get_position()

    desired_v = 80
    dist_travel = desired_v * dt

    def get_point(start, end, d_to_go):
        x0, y0 = start
        x1, y1 = end
        dy = y1 - y0
        dx = x1 - x0
        d = np.linalg.norm((dx, dy))

        x = x0 + d_to_go * dx / d
        y = y0 + d_to_go * dy / d

        return np.array((x, y))

    cur_pos = np.array(pos)
    ind = ind % len(track)
    cur_target = np.array(track[ind][2:4])

    result = [pos]
    for i in range(N - 1):
        remain_dist = np.linalg.norm(cur_target - cur_pos) - dist_travel
        if remain_dist > 0:
            p = get_point(cur_pos, cur_target, dist_travel)
            result.append(p)
            cur_pos = p
        else:
            # must ensure distance between 2 target points larger than dist_travel
            cur_pos = cur_target
            ind = (ind + 1) % len(track)
            cur_target = np.array(track[ind][2:4])

            p = get_point(cur_pos, cur_target, -remain_dist)
            result.append(p)
            cur_pos = p

    return result
Example #2
0
    def initialize(self, config):
        self.config = config

        # initialize agent
        self.agent = Agent(critic_arch=self.critic_arch,
                           actor_arch=self.actor_arch,
                           critic_hidden_size=self.config['CRITIC_SIZE'],
                           actor_hidden_size=self.config['ACTOR_SIZE'],
                           action_size=self.config['ACTION_SIZE'],
                           memory_size=self.config['MEMORY_SIZE'],
                           critic_learning_rate=self.config['CRITIC_LR'],
                           actor_learning_rate=self.config['ACTOR_LR'],
                           gamma=self.config['GAMMA'],
                           tau=self.config['TAU'])

        # intialize environment
        self.env = Environment(agent=self.agent,
                               classifier=self.classifier,
                               img_dir=self.config['IMG_PATH'],
                               img_size=self.config['IMG_SIZE'],
                               blob_size=self.config['BLOB_SIZE'],
                               done_threshold=self.config['DONE_THRESHOLD'])

        self.noise = OUNoise(self.config['ACTION_SIZE'])
Example #3
0
def main():
    """

    :return:
    """
    setup_logging()
    op = OptionParser("Usage: %prog [options] " "(tasks_config.json | tasks_config.py)")
    op.add_option('-o', '--output', default='results.out', help='File where the simulation results are saved.')
    op.add_option('--scramble', action='store_true', default=False,
                  help='Randomly scramble the words in the tasks for ' 'a human player.')
    op.add_option('-w', '--show-world', action='store_true', default=False,
                  help='shows a visualization of the world in the console ' '(mainly for debugging)')
    op.add_option('-d', '--time-delay', default=0, type=float,
                  help='adds some delay between each timestep for easier' ' visualization.')
    op.add_option('-l', '--learner', default='learners.human_learner.HumanLearner',
                  help='Defines the type of learner.')
    op.add_option('-v', '--view', default='BaseView', help='Viewing mode.')
    op.add_option('-s', '--serializer', default='core.serializer.StandardSerializer',
                  help='Sets the encoding of characters into bits')
    op.add_option('--learner-cmd', help='The cmd to run to launch RemoteLearner.')
    op.add_option('--learner-port', default=5556, help='Port on which to accept remote learner.')
    op.add_option('--max-reward-per-task', default=10, type=int,
                  help='Maximum reward that we can give to a learner for' ' a given task.')
    opt, args = op.parse_args()
    if len(args) == 0:
        op.error("Tasks schedule configuration file required.")
    tasks_config_file = args[0]  # Retrieve task configuration
    logger = logging.getLogger(__name__)
    logger.info("Starting new evaluation session")
    serializer = create_serializer(opt.serializer)  # Set hoe enviroment produces and interprets a bit signal
    learner = create_learner(opt.learner, serializer, opt.learner_cmd, opt.learner_port)  # Create learner
    task_scheduler = create_tasks_from_config(tasks_config_file)  # Create tasks, add to scheduler to be served
    env = Environment(serializer, task_scheduler, opt.scramble, opt.max_reward_per_task) # Construct environment
    session = Session(env, learner, opt.time_delay)  # a learning session
    view = create_view(opt.view, opt.learner, env, session, serializer, opt.show_world)  # setup view
    try:
        learner.set_view(view)  # Send interface to human learner
    except AttributeError:  # not human. pass
        pass
    try:
        view.initialize()  # talk
        session.run()
    except BaseException:
        view.finalize()
        save_results(session, opt.output)
        raise
    else:
        view.finalize()
Example #4
0
def main():
    env = gym.make('CarRacing-v0')
    env = Environment(env=env, FPS=50.0)

    done = False

    env.reset()
    # car = env.unwrapped.car
    # w = car.wheels[0]
    dt = 1 / FPS
    prev_a = MAX_a
    prev_steer = 0

    total_reward = 0
    # ego_car = Car(car, prev_a, prev_steer)

    while not done:
        print('########################')

        long_term_xs, long_term_ys = env.calc_long_term_targets()

        a, steer, x, y = long_term_MPC(env.car,
                                       list(zip(long_term_xs, long_term_ys)),
                                       dt)

        short_term_N = 5
        short_term_target = list(zip(x, y))[:short_term_N]
        a, steer, x, y = short_term_MPC(env.car, short_term_target, dt)

        print(a, steer, env.car.get_velocity())

        a = a / MAX_a
        steer = steer
        if a > 0:
            action = Action(steer, a / 10, 0)
        else:
            action = Action(steer, 0, -a)

        _, r, done, _ = env.step(action)
        env.car.take_control(action)
        total_reward += r

        env.render()

    print(total_reward)
Example #5
0
def process_world(conn, opt, tasks_config_file, world_id):
    try:
        serializer = StandardSerializer()
        task_scheduler = create_tasks_from_config(tasks_config_file)
        env = Environment(serializer, task_scheduler, opt.scramble,
                          opt.max_reward_per_task, not opt.bit_mode)
        learner = create_learner(opt.learner, serializer, opt.learner_cmd,
                                 opt.learner_port, not opt.bit_mode)
        session = Session(env, learner, opt.time_delay)

        args = conn.recv()
        while not (args is None):
            episode_id, step_count, seed, weight = args
            # INTERACTION BETWEEN ENVIRONMENT AND AGENT
            learner.net.set_genotype_weight(weight, seed)
            del weight
            episode_reward = session.iterate_n(step_count)
            # save_results(session, opt.output)
            conn.send((episode_reward, seed))
            args = conn.recv()
    except BaseException as e:
        print(e)
        conn.send(None)
Example #6
0
__author__ = 'aldnav'

from core.environment import Environment


if __name__ == '__main__':
    env = Environment()
    env.simulate()
Example #7
0
def main():
    setup_logging()
    op = OptionParser("Usage: %prog [options] "
                      "(tasks_config.json | tasks_config.py)")
    op.add_option('-o',
                  '--output',
                  default='results.out',
                  help='File where the simulation results are saved.')
    op.add_option('--scramble',
                  action='store_true',
                  default=False,
                  help='Randomly scramble the words in the tasks for '
                  'a human player.')
    op.add_option('-w',
                  '--show-world',
                  action='store_true',
                  default=False,
                  help='shows a visualization of the world in the console '
                  '(mainly for debugging)')
    op.add_option('-d',
                  '--time-delay',
                  default=0,
                  type=float,
                  help='adds some delay between each timestep for easier'
                  ' visualization.')
    op.add_option('-l',
                  '--learner',
                  default='learners.human_learner.HumanLearner',
                  help='Defines the type of learner.')
    op.add_option('-v', '--view', default='BaseView', help='Viewing mode.')
    op.add_option('--learner-cmd',
                  help='The cmd to run to launch RemoteLearner.')
    op.add_option('--learner-port',
                  default=5556,
                  type=int,
                  help='Port on which to accept remote learner.')
    op.add_option('--learner-address',
                  help='Network address on which the remote learner listens.')
    op.add_option('--max-reward-per-task',
                  default=2147483647,
                  type=int,
                  help='Maximum reward that we can give to a learner for'
                  ' a given task.')
    op.add_option('--curses',
                  action='store_true',
                  default=False,
                  help='Uses standard output instead of curses library.')
    op.add_option('--bit-mode',
                  action='store_true',
                  default=False,
                  help='Environment receives input in bytes.')
    opt, args = op.parse_args()
    if len(args) == 0:
        op.error("Tasks schedule configuration file required.")
    # retrieve the task configuration file
    tasks_config_file = args[0]
    logger = logging.getLogger(__name__)
    logger.info("Starting new evaluation session")
    # we choose how the environment will produce and interpret
    # the bit signal
    serializer = StandardSerializer()
    # create a learner (the human learner takes the serializer)
    learner = create_learner(opt.learner, serializer, opt.learner_cmd,
                             opt.learner_port, opt.learner_address,
                             not opt.bit_mode)
    # create our tasks and put them into a scheduler to serve them
    task_scheduler = create_tasks_from_config(tasks_config_file)
    # construct an environment

    env = Environment(serializer, task_scheduler, opt.scramble,
                      opt.max_reward_per_task, not opt.bit_mode)
    # a learning session
    session = Session(env, learner, opt.time_delay)
    # setup view
    view = create_view(opt.view, opt.learner, env, session, serializer,
                       opt.show_world, opt.curses, not opt.bit_mode)
    try:
        # send the interface to the human learner
        learner.set_view(view)
    except AttributeError:
        # this was not a human learner, nothing to do
        pass
    try:
        view.initialize()
        # ok guys, talk
        session.run()
    except BaseException:
        view.finalize()
        save_results(session, opt.output)
        raise
    else:
        view.finalize()
Example #8
0
class Trainer:
    def __init__(self, config, critic_arch, actor_arch, classifier):
        self.config = config
        self.critic_arch, self.actor_arch, self.classifier = critic_arch, actor_arch, classifier

        self.initialize(self.config)

    def initialize(self, config):
        self.config = config

        # initialize agent
        self.agent = Agent(critic_arch=self.critic_arch,
                           actor_arch=self.actor_arch,
                           critic_hidden_size=self.config['CRITIC_SIZE'],
                           actor_hidden_size=self.config['ACTOR_SIZE'],
                           action_size=self.config['ACTION_SIZE'],
                           memory_size=self.config['MEMORY_SIZE'],
                           critic_learning_rate=self.config['CRITIC_LR'],
                           actor_learning_rate=self.config['ACTOR_LR'],
                           gamma=self.config['GAMMA'],
                           tau=self.config['TAU'])

        # intialize environment
        self.env = Environment(agent=self.agent,
                               classifier=self.classifier,
                               img_dir=self.config['IMG_PATH'],
                               img_size=self.config['IMG_SIZE'],
                               blob_size=self.config['BLOB_SIZE'],
                               done_threshold=self.config['DONE_THRESHOLD'])

        self.noise = OUNoise(self.config['ACTION_SIZE'])

    def run(self):
        batch_size = self.config['BATCH_SIZE']
        rewards = []
        avg_rewards = []

        for episode in range(self.config['N_EPISODES']):
            state = self.env.reset()
            self.noise.reset()
            episode_reward = 0

            bar = progress_bar(range(self.config['MAX_LENGTH_EPISODE']))
            for step in bar:
                action = self.agent.get_action(state)
                action = self.noise.get_action(action=action, t=step)
                new_state, reward, done, info = self.env.step(action)
                self.agent.memory.push(state, action, reward, new_state, done)

                if len(self.agent.memory) > batch_size:
                    self.agent.update(batch_size)

                state = new_state
                episode_reward += reward

                bar.comment = f"reward: {reward:.3f} - episode_reward: {episode_reward:.3f}"

                if done:
                    print(
                        f"Done episode {episode:<3} with reward of: {episode_reward:.2f}, avg reward: {np.mean(rewards)}"
                    )
                    break

            rewards.append(episode_reward)
            avg_rewards.append(np.mean(rewards))
        return rewards, avg_rewards

    def save(self, name=None):
        """
        specify `name` to override experiment name in config
        """
        name = name or self.config['NAME']
        state_dict = {
            'critic': self.agent.critic.state_dict(),
            'actor': self.agent.actor.state_dict(),
            'critic_target': self.agent.critic_target.state_dict(),
            'actor_target': self.agent.actor_target.state_dict()
        }

        torch.save(state_dict, f"{self.config['WEIGHT_PATH']}/{name}.pth")

    def load(self, name=None):
        """
        specify `name` to override experiment name in config
        """
        name = name or self.config['NAME']
        state_dict = torch.load(f"{self.config['WEIGHT_PATH']}/{name}.pth")

        self.agent.critic.load_state_dict(state_dict['critic'])
        self.agent.actor.load_state_dict(state_dict['actor'])
        self.agent.critic_target.load_state_dict(state_dict['critic_target'])
        self.agent.actor_target.load_state_dict(state_dict['actor_target'])
Example #9
0
 def create_base_environment(self):
     env = Environment()
     # env.add_primitives(self.MATH_BINDINGS)
     env.add_primitives(self.OP_BINDINGS)
     env.add_primitives({'exit': (self.exit_function(), 'Void -> Void')})
     return env