class PytorchRLBaseline:

    def init(self, context: Context):
        context.info('init()')

        self.image_processor = DTPytorchWrapper()
        self.action_processor = ActionWrapper(FakeWrap())
        from model import DDPG

        self.check_gpu_available(context)

        self.model = DDPG(state_dim=self.image_processor.shape, action_dim=2, max_action=1, net_type="cnn")
        self.current_image = np.zeros((640, 480, 3))
        self.model.load("model", directory="./models")

    def check_gpu_available(self, context: Context):
        import torch
        available = torch.cuda.is_available()
        req = os.environ.get('AIDO_REQUIRE_GPU', None)
        context.info(f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}')
        context.info('init()')
        if available:
            i = torch.cuda.current_device()
            count = torch.cuda.device_count()
            name = torch.cuda.get_device_name(i)
            context.info(f'device {i} of {count}; name = {name!r}')
        else:
            if req is not None:
                msg = 'I need a GPU; bailing.'
                context.error(msg)
                raise RuntimeError(msg)

    def on_received_seed(self, data: int):
        np.random.seed(data)

    def on_received_episode_start(self, context: Context, data: EpisodeStart):
        context.info(f'Starting episode "{data.episode_name}".')

    def on_received_observations(self, data: DB20Observations):
        camera: JPGImage = data.camera
        obs = jpg2rgb(camera.jpg_data)
        self.current_image = self.image_processor.preprocess(obs)

    def compute_action(self, observation):
        action = self.model.predict(observation)

        return self.action_processor.action(action.astype(float))

    def on_received_get_commands(self, context: Context):
        pwm_left, pwm_right = self.compute_action(self.current_image)
        pwm_left = float(np.clip(pwm_left, -1, +1))
        pwm_right = float(np.clip(pwm_right, -1, +1))
        grey = RGB(0.0, 0.0, 0.0)
        led_commands = LEDSCommands(grey, grey, grey, grey, grey)
        pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right)
        commands = DB20Commands(pwm_commands, led_commands)
        context.write('commands', commands)

    def finish(self, context: Context):
        context.info('finish()')
Example #2
0
class PytorchRLTemplateAgent:
    def __init__(self, load_model=False, model_path=None):
        logger.info('PytorchRLTemplateAgent init')
        self.preprocessor = DTPytorchWrapper()

        self.model = DDPG(state_dim=self.preprocessor.shape,
                          action_dim=2,
                          max_action=1,
                          net_type="cnn")
        self.current_image = np.zeros((640, 480, 3))

        if load_model:
            logger.info('PytorchRLTemplateAgent loading models')
            fp = model_path if model_path else "model"
            self.model.load(fp, "models", for_inference=True)
        logger.info('PytorchRLTemplateAgent init complete')

    def init(self, context: Context):
        context.info('init()')

    def on_received_seed(self, data: int):
        np.random.seed(data)

    def on_received_episode_start(self, context: Context, data: EpisodeStart):
        context.info(f'Starting episode "{data.episode_name}".')

    def on_received_observations(self, data: Duckiebot1Observations):
        camera: JPGImage = data.camera
        obs = jpg2rgb(camera.jpg_data)
        self.current_image = self.preprocessor.preprocess(obs)

    def compute_action(self, observation):
        #if observation.shape != self.preprocessor.transposed_shape:
        #    observation = self.preprocessor.preprocess(observation)
        action = self.model.predict(observation)
        return action.astype(float)

    def on_received_get_commands(self, context: Context):
        pwm_left, pwm_right = self.compute_action(self.current_image)

        pwm_left = float(np.clip(pwm_left, -1, +1))
        pwm_right = float(np.clip(pwm_right, -1, +1))

        grey = RGB(0.0, 0.0, 0.0)
        led_commands = LEDSCommands(grey, grey, grey, grey, grey)
        pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right)
        commands = Duckiebot1Commands(pwm_commands, led_commands)
        context.write('commands', commands)

    def finish(self, context: Context):
        context.info('finish()')
Example #3
0
def solve(params, cis):
    # python has dynamic typing, the line below can help IDEs with autocompletion
    assert isinstance(cis, ChallengeInterfaceSolution)
    # after this cis. will provide you with some autocompletion in some IDEs (e.g.: pycharm)
    cis.info('Creating model.')
    # you can have logging capabilties through the solution interface (cis).
    # the info you log can be retrieved from your submission files.

    # We get environment from the Evaluation Engine
    cis.info('Making environment')
    env = gym.make(params['env'])

    # === BEGIN SUBMISSION ===

    # If you created custom wrappers, you also need to copy them into this folder.

    from wrappers import NormalizeWrapper, ImgWrapper, ActionWrapper, ResizeWrapper

    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    # to make the images pytorch-conv-compatible
    env = ImgWrapper(env)
    env = ActionWrapper(env)

    # you ONLY need this wrapper if you trained your policy on [speed,steering angle]
    # instead [left speed, right speed]
    env = SteeringToWheelVelWrapper(env)

    # you have to make sure that you're wrapping at least the actions
    # and observations in the same as during training so that your model
    # receives the same kind of input, because that's what it's trained for
    # (for example if your model is trained on grayscale images and here
    # you _don't_ make it grayscale too, then your model wont work)

    # HERE YOU NEED TO CREATE THE POLICY NETWORK SAME AS YOU DID IN THE TRAINING CODE
    # if you aren't using the DDPG baseline code, then make sure to copy your model
    # into the model.py file and that it has a model.predict(state) method.
    from model import DDPG

    model = DDPG(state_dim=env.observation_space.shape,
                 action_dim=2,
                 max_action=1,
                 net_type="cnn")

    try:
        model.load("model", "models")

        # === END SUBMISSION ===

        # Then we make sure we have a connection with the environment and it is ready to go
        cis.info('Reset environment')
        observation = env.reset()

        # While there are no signal of completion (simulation done)
        # we run the predictions for a number of episodes, don't worry, we have the control on this part
        while True:
            # we passe the observation to our model, and we get an action in return
            action = model.predict(observation)
            # we tell the environment to perform this action and we get some info back in OpenAI Gym style
            observation, reward, done, info = env.step(action)
            # here you may want to compute some stats, like how much reward are you getting
            # notice, this reward may no be associated with the challenge score.

            # it is important to check for this flag, the Evalution Engine will let us know when should we finish
            # if we are not careful with this the Evaluation Engine will kill our container and we will get no score
            # from this submission
            if 'simulation_done' in info:
                cis.info('simulation_done received.')
                break
            if done:
                cis.info('Episode done; calling reset()')
                env.reset()

    finally:
        # release CPU/GPU resources, let's be friendly with other users that may need them
        cis.info('Releasing resources')
        try:
            model.close()
        except:
            msg = 'Could not call model.close():\n%s' % traceback.format_exc()
            cis.error(msg)
    cis.info('Graceful exit of solve()')
    env = gym.make('Pendulum-v0').unwrapped
    n_state = env.observation_space.shape[0]  # 提取state的維度
    n_action = env.action_space.shape[0]  # 提取action的維度
    a_limit = env.action_space.high[0]  # 提取action連續動作中,最大的可能數值

    # Create network
    net = DDPG(n_state=n_state,
               n_action=n_action,
               a_limit=a_limit,
               model_folder=model_folder,
               memory_size=memory_size,
               batch_size=batch_size,
               tau=tau,
               gamma=gamma,
               var=var)
    net.load()

    # Train
    reward_list = []
    for i in range(episode):
        s = env.reset()
        total_reward = 0
        for j in range(max_iter):
            # env.render()
            a = net.chooseAction(s)
            s_, r, finish, info = env.step(a)

            # 將資料存到記憶庫並更新參數
            net.store_path(s, a, r / 10, s_)
            net.update()
Example #5
0
def master_loop(env):

    logger = logging.getLogger()
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    fileHandler = logging.FileHandler('./log/test.log')
    fileHandler.setFormatter(formatter)

    logger.addHandler(fileHandler)
    logger.setLevel(logging.INFO)

    s_dim = env.get_s_dim()
    a_dim = env.get_a_dim()
    a_high = env.get_a_high()
    a_low = env.get_a_low()
    # print(a_bound)
    print("s_dim: {}, a_dim{}, a_high:{}, a_low:{}".format(
        s_dim, a_dim, a_high, a_low))
    ddpg = DDPG(a_dim,
                s_dim,
                a_high,
                a_low,
                lr_a=LR_A,
                lr_c=LR_C,
                gamma=GAMMA,
                tau=TAU,
                rpm_size=MEMORY_CAPACITY,
                batch_size=BATCH_SIZE)

    status = MPI.Status()
    start_time = time.time()
    reset_time = time.time()

    total_eps = 0
    total_step = 0

    n_step = 0
    n_eps = 0

    max_reward = -9999
    max_reward_rank = 0

    ddpg.load()

    while total_eps < MAX_EPISODES:
        data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
        source = status.Get_source()
        tag = status.Get_tag()

        if tag == REQ_ACTION:
            # action = env.action_space.sample()
            action = ddpg.choose_action(data)
            comm.send((action, total_eps, total_step),
                      dest=source,
                      tag=RSP_ACTION)

        elif tag == OBS_DATA:
            n_step += 1
            total_step += 1
            (s, a, r, s_, done, ep_reward, ep_step) = data
            is_done = 0.0
            if done:
                is_done = 1.0

            ddpg.store_transition(s, a, r, s_, is_done)

            if ddpg.pointer > LEARN_START and total_step % 3 == 0:
                ddpg.learn()

            if done:
                total_eps += 1
                if ep_reward > max_reward:
                    max_reward = ep_reward
                    max_reward_rank = source

                s = "eps: {:>8}, worker: {:>3}, ep_reward:{:7.4f}, max:{:7.4f}/{:>3}, step:{:4}".format(
                    total_eps, source, ep_reward, max_reward, max_reward_rank,
                    ep_step)
                #print(s)
                logging.info(s)

                if total_eps % 500 == 0:
                    ddpg.save(total_eps)
                    interval = time.time() - reset_time
                    s = "# total_step: {:>8} ,total_eps: {:>6} eps/min: {:>6}, frame/sec: {:>6}".format(
                        total_step, total_eps, n_eps / interval * 60,
                        n_step / interval)
                    #print(s)
                    logging.info(s)

                    n_step = 0
                    n_eps = 0
                    reset_time = time.time()
class PytorchRLTemplateAgent:
    def __init__(self, load_model: bool, model_path: Optional[str]):
        self.load_model = load_model
        self.model_path = model_path

    def init(self, context: Context):
        self.check_gpu_available(context)
        logger.info("PytorchRLTemplateAgent init")
        from model import DDPG

        self.preprocessor = DTPytorchWrapper()

        self.model = DDPG(state_dim=self.preprocessor.shape,
                          action_dim=2,
                          max_action=1,
                          net_type="cnn")
        self.current_image = np.zeros((640, 480, 3))

        if self.load_model:
            logger.info("Pytorch Template Agent loading models")
            fp = self.model_path if self.model_path else "model"
            self.model.load(fp, "models", for_inference=True)
        logger.info("PytorchRLTemplateAgent init complete")

    def check_gpu_available(self, context: Context):
        import torch

        available = torch.cuda.is_available()
        context.info(f"torch.cuda.is_available = {available!r}")
        context.info("init()")
        if available:
            i = torch.cuda.current_device()
            count = torch.cuda.device_count()
            name = torch.cuda.get_device_name(i)
            context.info(f"device {i} of {count}; name = {name!r}")
        else:
            no_hardware_GPU_available(context)

    def on_received_seed(self, data: int):
        np.random.seed(data)

    def on_received_episode_start(self, context: Context, data: EpisodeStart):
        context.info(f'Starting episode "{data.episode_name}".')

    def on_received_observations(self, data: DB20Observations):
        camera: JPGImage = data.camera
        obs = jpg2rgb(camera.jpg_data)
        self.current_image = self.preprocessor.preprocess(obs)

    def compute_action(self, observation):
        # if observation.shape != self.preprocessor.transposed_shape:
        #    observation = self.preprocessor.preprocess(observation)
        action = self.model.predict(observation)
        return action.astype(float)

    def on_received_get_commands(self, context: Context):
        pwm_left, pwm_right = self.compute_action(self.current_image)

        pwm_left = float(np.clip(pwm_left, -1, +1))
        pwm_right = float(np.clip(pwm_right, -1, +1))

        grey = RGB(0.0, 0.0, 0.0)
        led_commands = LEDSCommands(grey, grey, grey, grey, grey)
        pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right)
        commands = DB20Commands(pwm_commands, led_commands)
        context.write("commands", commands)

    def finish(self, context: Context):
        context.info("finish()")
class PytorchRLTemplateAgent:
    def __init__(self):
        pass

    def init(self, context: Context, load_model=False, model_path=None):
        self.check_gpu_available(context)
        logger.info('PytorchRLTemplateAgent init')
        self.preprocessor = DTPytorchWrapper()

        self.model = DDPG(state_dim=self.preprocessor.shape,
                          action_dim=2,
                          max_action=1,
                          net_type="cnn")
        self.current_image = np.zeros((640, 480, 3))

        if load_model:
            logger.info('PytorchRLTemplateAgent loading models')
            fp = model_path if model_path else "model"
            self.model.load(fp, "models", for_inference=True)
        logger.info('PytorchRLTemplateAgent init complete')

    def check_gpu_available(self, context: Context):
        available = torch.cuda.is_available()
        req = os.environ.get('AIDO_REQUIRE_GPU', None)
        context.info(
            f'torch.cuda.is_available = {available!r} AIDO_REQUIRE_GPU = {req!r}'
        )
        context.info('init()')
        if available:
            i = torch.cuda.current_device()
            count = torch.cuda.device_count()
            name = torch.cuda.get_device_name(i)
            context.info(f'device {i} of {count}; name = {name!r}')
        else:
            if req is not None:
                msg = 'I need a GPU; bailing.'
                context.error(msg)
                raise Exception(msg)

    def on_received_seed(self, data: int):
        np.random.seed(data)

    def on_received_episode_start(self, context: Context, data: EpisodeStart):
        context.info(f'Starting episode "{data.episode_name}".')

    def on_received_observations(self, data: DB20Observations):
        camera: JPGImage = data.camera
        obs = jpg2rgb(camera.jpg_data)
        self.current_image = self.preprocessor.preprocess(obs)

    def compute_action(self, observation):
        #if observation.shape != self.preprocessor.transposed_shape:
        #    observation = self.preprocessor.preprocess(observation)
        action = self.model.predict(observation)
        return action.astype(float)

    def on_received_get_commands(self, context: Context):
        pwm_left, pwm_right = self.compute_action(self.current_image)

        pwm_left = float(np.clip(pwm_left, -1, +1))
        pwm_right = float(np.clip(pwm_right, -1, +1))

        grey = RGB(0.0, 0.0, 0.0)
        led_commands = LEDSCommands(grey, grey, grey, grey, grey)
        pwm_commands = PWMCommands(motor_left=pwm_left, motor_right=pwm_right)
        commands = DB20Commands(pwm_commands, led_commands)
        context.write('commands', commands)

    def finish(self, context: Context):
        context.info('finish()')