class PytorchRLBaseline: def init(self, context: Context): context.info('init()') self.image_processor = DTPytorchWrapper() self.action_processor = ActionWrapper(FakeWrap()) from model import DDPG self.check_gpu_available(context) self.model = DDPG(state_dim=self.image_processor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) self.model.load("model", directory="./models") def check_gpu_available(self, context: Context): import torch available = torch.cuda.is_available() req = os.environ.get('AIDO_REQUIRE_GPU', None) context.info(f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}') context.info('init()') if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f'device {i} of {count}; name = {name!r}') else: if req is not None: msg = 'I need a GPU; bailing.' context.error(msg) raise RuntimeError(msg) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.image_processor.preprocess(obs) def compute_action(self, observation): action = self.model.predict(observation) return self.action_processor.action(action.astype(float)) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')
class PytorchRLTemplateAgent: def __init__(self, load_model=False, model_path=None): logger.info('PytorchRLTemplateAgent init') self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if load_model: logger.info('PytorchRLTemplateAgent loading models') fp = model_path if model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info('PytorchRLTemplateAgent init complete') def init(self, context: Context): context.info('init()') def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: Duckiebot1Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): #if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = Duckiebot1Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')
def solve(params, cis): # python has dynamic typing, the line below can help IDEs with autocompletion assert isinstance(cis, ChallengeInterfaceSolution) # after this cis. will provide you with some autocompletion in some IDEs (e.g.: pycharm) cis.info('Creating model.') # you can have logging capabilties through the solution interface (cis). # the info you log can be retrieved from your submission files. # We get environment from the Evaluation Engine cis.info('Making environment') env = gym.make(params['env']) # === BEGIN SUBMISSION === # If you created custom wrappers, you also need to copy them into this folder. from wrappers import NormalizeWrapper, ImgWrapper, ActionWrapper, ResizeWrapper env = ResizeWrapper(env) env = NormalizeWrapper(env) # to make the images pytorch-conv-compatible env = ImgWrapper(env) env = ActionWrapper(env) # you ONLY need this wrapper if you trained your policy on [speed,steering angle] # instead [left speed, right speed] env = SteeringToWheelVelWrapper(env) # you have to make sure that you're wrapping at least the actions # and observations in the same as during training so that your model # receives the same kind of input, because that's what it's trained for # (for example if your model is trained on grayscale images and here # you _don't_ make it grayscale too, then your model wont work) # HERE YOU NEED TO CREATE THE POLICY NETWORK SAME AS YOU DID IN THE TRAINING CODE # if you aren't using the DDPG baseline code, then make sure to copy your model # into the model.py file and that it has a model.predict(state) method. from model import DDPG model = DDPG(state_dim=env.observation_space.shape, action_dim=2, max_action=1, net_type="cnn") try: model.load("model", "models") # === END SUBMISSION === # Then we make sure we have a connection with the environment and it is ready to go cis.info('Reset environment') observation = env.reset() # While there are no signal of completion (simulation done) # we run the predictions for a number of episodes, don't worry, we have the control on this part while True: # we passe the observation to our model, and we get an action in return action = model.predict(observation) # we tell the environment to perform this action and we get some info back in OpenAI Gym style observation, reward, done, info = env.step(action) # here you may want to compute some stats, like how much reward are you getting # notice, this reward may no be associated with the challenge score. # it is important to check for this flag, the Evalution Engine will let us know when should we finish # if we are not careful with this the Evaluation Engine will kill our container and we will get no score # from this submission if 'simulation_done' in info: cis.info('simulation_done received.') break if done: cis.info('Episode done; calling reset()') env.reset() finally: # release CPU/GPU resources, let's be friendly with other users that may need them cis.info('Releasing resources') try: model.close() except: msg = 'Could not call model.close():\n%s' % traceback.format_exc() cis.error(msg) cis.info('Graceful exit of solve()')
class PytorchRLTemplateAgent: def __init__(self, load_model: bool, model_path: Optional[str]): self.load_model = load_model self.model_path = model_path def init(self, context: Context): self.check_gpu_available(context) logger.info("PytorchRLTemplateAgent init") from model import DDPG self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if self.load_model: logger.info("Pytorch Template Agent loading models") fp = self.model_path if self.model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info("PytorchRLTemplateAgent init complete") def check_gpu_available(self, context: Context): import torch available = torch.cuda.is_available() context.info(f"torch.cuda.is_available = {available!r}") context.info("init()") if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f"device {i} of {count}; name = {name!r}") else: no_hardware_GPU_available(context) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): # if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write("commands", commands) def finish(self, context: Context): context.info("finish()")
class PytorchRLTemplateAgent: def __init__(self): pass def init(self, context: Context, load_model=False, model_path=None): self.check_gpu_available(context) logger.info('PytorchRLTemplateAgent init') self.preprocessor = DTPytorchWrapper() self.model = DDPG(state_dim=self.preprocessor.shape, action_dim=2, max_action=1, net_type="cnn") self.current_image = np.zeros((640, 480, 3)) if load_model: logger.info('PytorchRLTemplateAgent loading models') fp = model_path if model_path else "model" self.model.load(fp, "models", for_inference=True) logger.info('PytorchRLTemplateAgent init complete') def check_gpu_available(self, context: Context): available = torch.cuda.is_available() req = os.environ.get('AIDO_REQUIRE_GPU', None) context.info( f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}' ) context.info('init()') if available: i = torch.cuda.current_device() count = torch.cuda.device_count() name = torch.cuda.get_device_name(i) context.info(f'device {i} of {count}; name = {name!r}') else: if req is not None: msg = 'I need a GPU; bailing.' context.error(msg) raise Exception(msg) def on_received_seed(self, data: int): np.random.seed(data) def on_received_episode_start(self, context: Context, data: EpisodeStart): context.info(f'Starting episode "{data.episode_name}".') def on_received_observations(self, data: DB20Observations): camera: JPGImage = data.camera obs = jpg2rgb(camera.jpg_data) self.current_image = self.preprocessor.preprocess(obs) def compute_action(self, observation): #if observation.shape != self.preprocessor.transposed_shape: # observation = self.preprocessor.preprocess(observation) action = self.model.predict(observation) return action.astype(float) def on_received_get_commands(self, context: Context): pwm_left, pwm_right = self.compute_action(self.current_image) pwm_left = float(np.clip(pwm_left, -1, +1)) pwm_right = float(np.clip(pwm_right, -1, +1)) grey = RGB(0.0, 0.0, 0.0) led_commands = LEDSCommands(grey, grey, grey, grey, grey) pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right) commands = DB20Commands(pwm_commands, led_commands) context.write('commands', commands) def finish(self, context: Context): context.info('finish()')