コード例 #1
0
class DonkeyRemoteContoller:
    def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)):
        self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name,
                                       broker=mqtt_broker)
        self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name,
                                           broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()
        self.sensor_size = sensor_size

    def get_sensor_size(self):
        return self.sensor_size

    def wait_until_connected(self):
        pass

    def take_action(self, action):
        self.controller_pub.run(action)

    def quit(self):
        self.camera_sub.shutdown()
        self.controller_pub.shutdown()

    def get_original_image(self):
        return self.img

    def observe(self):
        jpg = self.camera_sub.run()
        self.img = self.jpgToImg.run(jpg)
        return self.img
コード例 #2
0
class DonkeyRemoteContoller:
    def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)):
        self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker)#, def_value=(np.zeros((40, 40)), {"cte": 0}))
        self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()
        self.sensor_size = sensor_size

    def get_sensor_size(self):
        return self.sensor_size

    def wait_until_connected(self):
        pass

    def take_action(self, action):
        self.controller_pub.run(action)

    def quit(self):
        self.camera_sub.shutdown()
        self.controller_pub.shutdown()

    def get_original_image(self):
        return self.img

    def observe(self):
        
        jpg = self.camera_sub.run()
        
        #data = self.camera_sub.run()
        # #print("State: {}".format(state))
        
        # #jpg, info = state
        self.img = self.jpgToImg.run(jpg)
        # return self.img#, info

        return self.img
コード例 #3
0
class DonkeyCarSpeed:
    
    def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker)
        self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
        

    def observe(self):

        self.img = self.jpgToImg.run(self.camera_sub.run())
        self.state = self.state_sub.run()
  
    def reset(self):
        self.controller_pub.run([0, 0])

        self.observe()
        time.sleep(1)

        return self.img
    
    def step(self, control, step_length):
        
        self.controller_pub.run(control)
        time.sleep(step_length)

        self.observe()

        v = self.state["v"]
        self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5

        done = self.is_dead()

        return self.img, done

    def is_dead(self):

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage

        #(im[:,:,0] > threshold) & (im[:,:,1] < 150) & (im[:,:,1] < 150)

        crop = self.img[-crop_height:, crop_width:-crop_width]
        #gs = np.dot(crop, [0.299, 0.587, 0.114])

        r = crop[:,:,0] < threshold
        g = crop[:,:,1] < threshold
        b = crop[:,:,2] < threshold

        pixels = (r & g & b).sum()
        
        #im = self.state[-crop_height:, crop_width:-crop_width]
        #gs = (im[:,:,0] > 150) & (im[:,:,1] < 150) & (im[:,:,1] < 150)

        #pixels = len(gs[gs])

        #pixels = len(gs[gs < threshold])

        print("Pixels: {}, Required: {}".format(pixels, pixels_required))
        
        return  pixels < pixels_required
コード例 #4
0
class RL_Agent():
    def __init__(self, alg_type, sim, car_name=args.car_name):
        self.args = define_config()
        self.agent = Dreamer(self.args)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.observation = torch.zeros(
            (1, 3, 64, 64))  # init observation, with batch dim
        self.belief = torch.zeros(1,
                                  self.args.belief_size,
                                  device=self.args.device)
        self.posterior_state = torch.zeros(1,
                                           self.args.state_size,
                                           device=self.args.device)
        self.action = torch.zeros(1,
                                  self.args.action_size,
                                  device=self.args.device)
        # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device)

        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")

    def reset(self, image):
        self.episode += 1

        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        # self.command_history = np.zeros(3*COMMAND_HISTORY_LENGTH)
        # self.state = np.vstack([image for x in range(FRAME_STACK)])
        self.belief = torch.zeros(1,
                                  self.args.belief_size,
                                  device=self.args.device)
        self.posterior_state = torch.zeros(1,
                                           self.args.state_size,
                                           device=self.args.device)
        self.action = torch.zeros(1,
                                  self.args.action_size,
                                  device=self.args.device)
        # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device)

        self.buffer_sent = False
        self.buffer_received = False
        self.params_sent = False
        self.params_received = False

    def train(self):
        #print(f"Training for {int(time.time() - self.training_start)} seconds")

        if (time.time() - self.training_start) > TRAINING_TIMEOUT:
            """Temporary fix for when sometimes the replay buffer fails to send"""
            self.training_start = time.time()
            self.buffers_sent = 0
            self.replay_buffer_pub.run((0, False))
            return False

        if len(self.replay_buffer) > 0:

            buffers_received = self.replay_buffer_received_sub.run()

            if self.buffers_sent == buffers_received:
                self.buffers_sent += 1
                self.replay_buffer_pub.run(
                    (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE]))
                print(
                    f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations"
                )
                self.replay_buffer = self.replay_buffer[BLOCK_SIZE:]

            return True

        if self.replay_buffer_received_sub.run() == self.buffers_sent:
            self.buffers_sent = 0
            self.replay_buffer_received_pub.run(0)
            self.replay_buffer_pub.run((0, False))

        new_params = self.param_sub.run()

        if not new_params:
            return True

        print("Received new params.")
        self.agent.import_parameters(new_params)
        self.param_pub.run(False)

        return False

    def run(self, image, speed=None):

        if not speed:
            self.speed = self.target_speed
        else:
            self.speed = speed

        if image is not None:
            self.image = image

        self.dead = self.is_dead(
            self.image) if not self.sim else self.is_dead_sim(self.image)

        if self.step > 0 and not self.training:
            """Save observation to replay buffer"""
            reward = 1 + (self.speed - self.args.throttle_min) / (
                self.args.throttle_max - self.args.throttle_min)
            # reward = min(reward, 2) / 2
            # reward = self.speed + 1
            done = self.dead
            reward = reward * -10 if self.dead else reward
            # reward = -self.speed - 10 if self.dead else reward
            # cv2.imwrite("./obs/img_{t}.png".format(t=self.step), self.image)
            next_observation = self.agent.process_im(self.image)

            # self.replay_buffer.append((self.observation,
            #                             self.action.cpu(),
            #                            reward,
            #                             done))

            self.replay_buffer.append(
                (next_observation, self.action.cpu(), reward, done))

            # next_command_history = np.roll(self.command_history, 3)
            # next_command_history[:3] = [self.steering, self.target_speed, self.speed]

            # next_state = np.roll(self.state, 1)
            # next_state[:1, :, :] = self.agent.process_im(self.image, IMAGE_SIZE, RGB)

            # self.replay_buffer.append([ [self.state, self.command_history],
            #                             [self.steering, self.target_speed],
            #                             [reward],
            #                             [next_state, next_command_history],
            #                             [float(not done)]])

            self.episode_reward += reward
            step_end = time.time()

            self.observation = next_observation  # obs is a tensor(3, 64, 64), img is a numpy (120, 180, 3)

            print(
                f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Steering, {self.steering:.2f}"
            )

            # self.state = next_state
            # self.command_history = next_command_history

            # print(f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}")

        if self.step > self.args.max_episodes_steps or (self.dead
                                                        and not self.training):
            self.training_start = time.time()

            self.step = 0
            self.steering = 0
            self.target_speed = 0

            self.training = True
            self.replay_buffer = self.replay_buffer[self.args.
                                                    skip_initial_steps:]
            return self.steering, self.target_speed, self.training

        if self.training:
            self.training = self.train()
            self.dead = False

            return self.steering, self.target_speed, self.training

        if self.step == 0:
            if not self.sim:
                input("Press Enter to start a new episode.")

            self.reset(self.agent.process_im(self.image))

        self.step += 1

        if self.step < self.args.skip_initial_steps:
            return 0.001, 0, False

        self.step_start = time.time()

        if self.episode <= self.args.random_episodes:
            self.steering = np.random.normal(0, 1)
            self.target_speed = 0.5
            self.action = torch.tensor([[self.steering, self.target_speed]],
                                       device=self.args.device)
        else:

            with torch.no_grad():
                self.belief, self.posterior_state = self.agent.infer_state(
                    self.observation.to(self.args.device),
                    action=self.action,
                    belief=self.belief,
                    state=self.posterior_state)
                self.action = self.agent.select_action(
                    (self.belief, self.posterior_state))
                # print("before limit", self.action)
                # maintain act_history
                # self.act_history = torch.roll(act_history, -args.action_size, dims=-1)
                # self.act_history[:, -args.action_size:] = action

                # to get steering and target_speed as numpy
                action = self.action.cpu().numpy(
                )  # act dim : [batch_size, act_size]
                # action = self.enforce_limits(action[0], self.steering)  # size [act_size]
                self.steering, self.target_speed = action[0][0], action[0][1]
                # self.action[0] = torch.tensor(action).to(self.action)
                # print("after limit ", self.action)
                ## didn't use enforce_limit yet
                # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # TODO: change this

        return self.steering, self.target_speed, self.training

        # action = self.agent.select_action((self.state, self.command_history))

        # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0])

        # return self.steering, self.target_speed, self.training

    def is_dead(self, img):
        """
        Counts the black pixels from the ground and compares the amount to a threshold value.
        If there are not enough black pixels the car is assumed to be off the track.
        """

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        # print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self, img):

        crop_height = 40
        required = 0.8

        cropped = img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required

    def enforce_limits(self, action, prev_steering):
        """
        Scale the agent actions to environment limits
        """

        var = (self.args.throttle_max - self.args.throttle_min) / 2
        mu = (self.args.throttle_max + self.args.throttle_min) / 2

        steering_min = max(self.args.steer_limit_left,
                           prev_steering - self.args.max_steering_diff)
        steering_max = min(self.args.steer_limit_right,
                           prev_steering + self.args.max_steering_diff)

        steering = max(steering_min, min(steering_max, action[0]))

        return np.array([steering, action[1] * var + mu], dtype=np.float32)
コード例 #5
0
class DonkeyRemoteEnv:
    def __init__(self,
                 car_name="Kari",
                 mqtt_broker="mqtt.eclipse.org",
                 realsense=False,
                 env_type="DonkeyCar"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name,
                                       broker=mqtt_broker)

        if realsense:
            self.state_sub = MQTTValueSub("donkey/%s/state" % car_name,
                                          broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name,
                                           broker=mqtt_broker)

        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
        self.realsense = realsense
        self.sim = env_type != "DonkeyCar"

    def observe(self):

        self.img = self.jpgToImg.run(self.camera_sub.run())

        if self.realsense:
            self.state = self.state_sub.run()
            v = self.state["v"]
            self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5

    def reset(self):
        self.controller_pub.run([0, 0])

        self.observe()
        time.sleep(2)

        return self.img

    def step(self, control, step_length):
        print(control)
        self.controller_pub.run(control)
        time.sleep(step_length)

        self.observe()

        done = self.is_dead_sim() if self.sim else self.is_dead_real()

        return self.img, done

    def is_dead_real(self):

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (self.img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = self.img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self):

        crop_height = 40
        required = 0.8

        cropped = self.img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required
コード例 #6
0
class RL_Agent():
    def __init__(self,
                 alg_type,
                 sim,
                 car_name=args.car_name,
                 encoder_update=args.encoder_update,
                 max_episode_steps=MAX_EPISODE_STEPS):

        if encoder_update == "pixel":
            print("Using Pixel encoder")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = False

        if encoder_update == "ae":
            print("Using Autoencoder loss")
            PARAMS["sac"]["encoder_critic_loss"] = False
            PARAMS["sac"]["encoder_ae_loss"] = True

        if encoder_update == "aesac":
            print("Using autoencoder and critic loss")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = True

        self.agent = AE_SAC(PARAMS)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH)

        self.state = np.vstack([
            self.agent.process_im(self.image, IMAGE_SIZE, RGB)
            for x in range(FRAME_STACK)
        ])
        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []
        self.max_episode_steps = max_episode_steps

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")

    def reset(self, image):
        self.episode += 1

        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH)
        self.state = np.vstack([image for x in range(FRAME_STACK)])
        self.buffer_sent = False
        self.buffer_received = False
        self.params_sent = False
        self.params_received = False

    def train(self):
        #print(f"Training for {int(time.time() - self.training_start)} seconds")

        if (time.time() - self.training_start) > TRAINING_TIMEOUT:
            """Temporary fix for when sometimes the replay buffer fails to send"""
            self.training_start = time.time()
            self.buffers_sent = 0
            self.replay_buffer_pub.run((0, False))
            return False

        if len(self.replay_buffer) > 0:

            buffers_received = self.replay_buffer_received_sub.run()

            if self.buffers_sent == buffers_received:
                self.buffers_sent += 1
                self.replay_buffer_pub.run(
                    (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE]))
                print(
                    f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations"
                )
                self.replay_buffer = self.replay_buffer[BLOCK_SIZE:]

            return True

        if self.replay_buffer_received_sub.run() == self.buffers_sent:
            self.buffers_sent = 0
            self.replay_buffer_received_pub.run(0)
            self.replay_buffer_pub.run((0, False))

        new_params = self.param_sub.run()

        if not new_params:
            return True

        print("Received new params.")
        self.agent.import_parameters(new_params)
        self.param_pub.run(False)

        return False

    def run(self, image, speed=None):

        if not speed:
            self.speed = self.target_speed
        else:
            self.speed = speed

        if image is not None:
            self.image = image

        self.dead = self.is_dead(
            self.image) if not self.sim else self.is_dead_sim(self.image)

        if self.step > 0 and not self.training:
            """Save observation to replay buffer"""
            if THROTTLE_MAX == THROTTLE_MIN:
                reward = 1
            else:
                reward = 1 + (self.speed - THROTTLE_MIN) / (THROTTLE_MAX -
                                                            THROTTLE_MIN)
                reward = min(reward, 2) / 2

            done = self.dead
            reward = reward * -10 if self.dead else reward

            next_command_history = np.roll(self.command_history, 3)
            next_command_history[:3] = [
                self.steering, self.target_speed, self.speed
            ]

            next_state = np.roll(self.state, IMAGE_CHANNELS)
            next_state[:IMAGE_CHANNELS, :, :] = self.agent.process_im(
                self.image, IMAGE_SIZE, RGB)

            self.replay_buffer.append([[self.state, self.command_history],
                                       [self.steering, self.target_speed],
                                       [reward],
                                       [next_state, next_command_history],
                                       [float(not done)]])

            self.episode_reward += reward
            step_end = time.time()

            self.state = next_state
            self.command_history = next_command_history

            print(
                f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Throttle: {self.target_speed:.2f}"
            )

        if self.step > self.max_episode_steps or (self.dead
                                                  and not self.training):
            self.training_start = time.time()

            self.step = 0
            self.steering = 0
            self.target_speed = 0
            self.training = True
            self.replay_buffer = self.replay_buffer[SKIP_INITIAL_STEPS:]
            return self.steering, self.target_speed, self.training

        if self.training:

            self.training = self.train()
            self.dead = False

            return self.steering, self.target_speed, self.training

        if self.step == 0:
            if not self.sim:
                input("Press Enter to start a new episode.")

            self.reset(self.agent.process_im(self.image, IMAGE_SIZE, RGB))

        self.step += 1

        if self.step < SKIP_INITIAL_STEPS:
            return 0.001, 0, False

        self.step_start = time.time()

        #if self.episode < RANDOM_EPISODES:
        #    action = action_space.sample()
        #else:

        action = self.agent.select_action((self.state, self.command_history))

        self.steering, self.target_speed = self.enforce_limits(
            action, self.command_history[:2])

        return self.steering, self.target_speed, self.training

    def is_dead(self, img):
        """
        Counts the black pixels from the ground and compares the amount to a threshold value.
        If there are not enough black pixels the car is assumed to be off the track.
        """

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        #print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self, img):

        crop_height = 40
        required = 0.8

        cropped = img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required

    def enforce_limits(self, action, prev_action):
        """
        Scale the agent actions to environment limits
        """

        prev_steering = prev_action[0]
        prev_throttle = prev_action[1]

        var = (THROTTLE_MAX - THROTTLE_MIN) / 2
        mu = (THROTTLE_MAX + THROTTLE_MIN) / 2

        raw_throttle = action[1] * var + mu

        steering = np.clip(action[0], prev_steering - MAX_STEERING_DIFF,
                           prev_steering + MAX_STEERING_DIFF)
        steering = np.clip(steering, STEER_LIMIT_LEFT, STEER_LIMIT_RIGHT)

        throttle = np.clip(raw_throttle, prev_throttle - MAX_THROTTLE_DIFF,
                           prev_throttle + MAX_THROTTLE_DIFF)

        return [steering, throttle]