def __init__(self, load_model=False, model_path=None):
        logger.info('PytorchAgent init')
        self.preprocessor = DTPytorchWrapper()

        self.model = Model()
        self.current_image = np.zeros((640, 480, 3))

        self.steering_to_wheel_wrapper = SteeringToWheelVelWrapper()

        self.controller = Controller()
        self.dt = None
        self.last_t = None
        self.old_obs = None

        logger.info('PytorchAgent init complete')
class PytorchAgent:
    def __init__(self, load_model=False, model_path=None):
        logger.info('PytorchAgent init')
        self.preprocessor = DTPytorchWrapper()

        self.model = Model()
        self.current_image = np.zeros((640, 480, 3))

        self.steering_to_wheel_wrapper = SteeringToWheelVelWrapper()

        self.controller = Controller()
        self.dt = None
        self.last_t = None
        self.old_obs = None

        logger.info('PytorchAgent init complete')

    def init(self, context: Context):
        context.info('init()')

    def on_received_seed(self, data: int):
        np.random.seed(data)

    def on_received_episode_start(self, context: Context, data: EpisodeStart):
        context.info(f'Starting episode "{data.episode_name}".')

    def on_received_observations(self, data: Duckiebot1Observations):
        camera: JPGImage = data.camera
        obs = jpg2rgb(camera.jpg_data)
        # self.current_image = self.preprocessor.preprocess(obs)
        self.current_image = obs

    def compute_action(self, observation):
        pose = self.model.predict(observation).detach().cpu().numpy()[0]
        pose[1] *= 3.1415
        time_now = time.time()
        if self.last_t is not None:
            self.dt = time_now - self.last_t
        v, omega = self.controller.compute_control_action(pose[0],
                                                          pose[1],
                                                          dt=self.dt)
        action = self.steering_to_wheel_wrapper.convert(np.array([v, omega]))
        self.last_t = time_now
        self.old_obs = observation
        return action.astype(float)

    def on_received_get_commands(self, context: Context):
        pwm_left, pwm_right = self.compute_action(self.current_image)

        pwm_left = float(np.clip(pwm_left, -1, +1))
        pwm_right = float(np.clip(pwm_right, -1, +1))

        grey = RGB(0.0, 0.0, 0.0)
        led_commands = LEDSCommands(grey, grey, grey, grey, grey)
        pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right)
        commands = Duckiebot1Commands(pwm_commands, led_commands)
        context.write('commands', commands)

    def finish(self, context: Context):
        context.info('finish()')
def launch_env(id=None):
    env = None
    if id is None:
        from gym_duckietown.simulator import Simulator
        env = Simulator(
            seed=123,  # random seed
            map_name="loop_empty",
            max_steps=500001,  # we don't want the gym to reset itself
            domain_rand=False,
            camera_width=640,
            camera_height=480,
            accept_start_angle_deg=4,  # start close to straight
            full_transparency=True,
            distortion=True,
        )
    else:
        env = gym.make(id)

    # Wrappers
    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
    env = SteeringToWheelVelWrapper(env)
    env = ActionWrapper(env)
    #env = DtRewardWrapper(env)

    return env
Esempio n. 4
0
def solve(params, cis):
    # python has dynamic typing, the line below can help IDEs with autocompletion
    assert isinstance(cis, ChallengeInterfaceSolution)
    # after this cis. will provide you with some autocompletion in some IDEs (e.g.: pycharm)
    cis.info('Creating model.')
    # you can have logging capabilties through the solution interface (cis).
    # the info you log can be retrieved from your submission files.

    # We get environment from the Evaluation Engine
    cis.info('Making environment')
    env = gym.make(params['env'])

    # === BEGIN SUBMISSION ===

    # If you created custom wrappers, you also need to copy them into this folder.

    from wrappers import NormalizeWrapper, ImgWrapper, ActionWrapper, ResizeWrapper

    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    # to make the images pytorch-conv-compatible
    env = ImgWrapper(env)
    env = ActionWrapper(env)

    # you ONLY need this wrapper if you trained your policy on [speed,steering angle]
    # instead [left speed, right speed]
    env = SteeringToWheelVelWrapper(env)

    # you have to make sure that you're wrapping at least the actions
    # and observations in the same as during training so that your model
    # receives the same kind of input, because that's what it's trained for
    # (for example if your model is trained on grayscale images and here
    # you _don't_ make it grayscale too, then your model wont work)

    # HERE YOU NEED TO CREATE THE POLICY NETWORK SAME AS YOU DID IN THE TRAINING CODE
    # if you aren't using the DDPG baseline code, then make sure to copy your model
    # into the model.py file and that it has a model.predict(state) method.
    from model import DDPG

    model = DDPG(state_dim=env.observation_space.shape,
                 action_dim=2,
                 max_action=1,
                 net_type="cnn")

    try:
        model.load("model", "models")

        # === END SUBMISSION ===

        # Then we make sure we have a connection with the environment and it is ready to go
        cis.info('Reset environment')
        observation = env.reset()

        # While there are no signal of completion (simulation done)
        # we run the predictions for a number of episodes, don't worry, we have the control on this part
        while True:
            # we passe the observation to our model, and we get an action in return
            action = model.predict(observation)
            # we tell the environment to perform this action and we get some info back in OpenAI Gym style
            observation, reward, done, info = env.step(action)
            # here you may want to compute some stats, like how much reward are you getting
            # notice, this reward may no be associated with the challenge score.

            # it is important to check for this flag, the Evalution Engine will let us know when should we finish
            # if we are not careful with this the Evaluation Engine will kill our container and we will get no score
            # from this submission
            if 'simulation_done' in info:
                cis.info('simulation_done received.')
                break
            if done:
                cis.info('Episode done; calling reset()')
                env.reset()

    finally:
        # release CPU/GPU resources, let's be friendly with other users that may need them
        cis.info('Releasing resources')
        try:
            model.close()
        except:
            msg = 'Could not call model.close():\n%s' % traceback.format_exc()
            cis.error(msg)
    cis.info('Graceful exit of solve()')
Esempio n. 5
0
                                 args.seed)

# Launch the env with our helper function
env = launch_env(seed=111, map_name=args.map_name)

# Wrappers
env = wrappers.Monitor(env,
                       './videos/test/' + file_name + '/',
                       force=True,
                       video_callable=lambda x: True)
env = CropWrapper(env)
env = ResizeWrapper(env)
#env = FrameStack(env, args.frame_stack_k)
env = NormalizeWrapper(env)
env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = SteeringToWheelVelWrapper(env)
#env = SoftActionWrapper(env)
#env = ActionWrapper(env)
#env = DtRewardWrapper(env) # not during testing

state_dim = env.observation_space.shape
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
policy = TD3(state_dim,
             action_dim,
             max_action,
             net_type=args.net_type,
             args=args)
Esempio n. 6
0
    os.makedirs("./results")
if args.save_models and not os.path.exists("./pytorch_models"):
    os.makedirs("./pytorch_models")

# Launch the env with our helper function
env = launch_env(map_name=args.map_name)

time_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time()))

# Wrappers
env = wrappers.Monitor(env, './videos/train/' + time_str + '/', force=True)
#env = ResizeWrapper(env)
#env = FrameStack(env, k = args.frame_stack_k)
#env = NormalizeWrapper(env)
#env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = SteeringToWheelVelWrapper(env)
#env = StableRewardWrapper(env)
#env = ActionWrapper(env)
#env = SoftActionWrapper(env)
env = DtRewardWrapper(env)

# Set seeds
seed(args.seed)

state_dim = env.observation_space.shape
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
policy = TD3(state_dim,
             action_dim,