def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size
class DonkeyRemoteContoller: def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker)#, def_value=(np.zeros((40, 40)), {"cte": 0})) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size def get_sensor_size(self): return self.sensor_size def wait_until_connected(self): pass def take_action(self, action): self.controller_pub.run(action) def quit(self): self.camera_sub.shutdown() self.controller_pub.shutdown() def get_original_image(self): return self.img def observe(self): jpg = self.camera_sub.run() #data = self.camera_sub.run() # #print("State: {}".format(state)) # #jpg, info = state self.img = self.jpgToImg.run(jpg) # return self.img#, info return self.img
class DonkeyRemoteContoller: def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size def get_sensor_size(self): return self.sensor_size def wait_until_connected(self): pass def take_action(self, action): self.controller_pub.run(action) def quit(self): self.camera_sub.shutdown() self.controller_pub.shutdown() def get_original_image(self): return self.img def observe(self): jpg = self.camera_sub.run() self.img = self.jpgToImg.run(jpg) return self.img
def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker) self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0
def __init__(self, car_name="Kari", mqtt_broker="mqtt.eclipse.org", realsense=False, env_type="DonkeyCar"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name, broker=mqtt_broker) if realsense: self.state_sub = MQTTValueSub("donkey/%s/state" % car_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 self.realsense = realsense self.sim = env_type != "DonkeyCar"
def __init__(self, alg_type, sim, car_name=args.car_name): self.args = define_config() self.agent = Dreamer(self.args) self.sim = sim self.image = np.zeros((120, 160, 3)) self.observation = torch.zeros( (1, 3, 64, 64)) # init observation, with batch dim self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org")
from donkeycar.parts.network import MQTTValuePub, MQTTValueSub from donkeycar.parts.transform import Lambda from donkeycar.parts.image import JpgToImgArr V = dk.vehicle.Vehicle() args = docopt(__doc__) print(args) V.add(MQTTValueSub(name="donkey/%s/camera" % args["--name"], broker=args["--broker"]), outputs=["jpg"]) V.add(JpgToImgArr(), inputs=["jpg"], outputs=["img_arr"]) V.add(ImgBGR2RGB(), inputs=["img_arr"], outputs=["rgb"]) V.add(ImageScale(4.0), inputs=["rgb"], outputs=["lg_img"]) V.add(CvImageView(), inputs=["lg_img"]) V.add(ArrowKeyboardControls(), outputs=["control"]) V.add(MQTTValuePub(name="donkey/%s/controls" % args["--name"]), inputs=["control"]) record_path = args["--record"] if record_path is not None: class ImageSaver: def __init__(self, path): self.index = 0 self.path = path def run(self, img_arr): if img_arr is None: return dest_path = os.path.join(self.path, "img_%d.jpg" % self.index) self.index += 1 cv2.imwrite(dest_path, img_arr)
from donkeycar.parts.dgym import DonkeyGymEnv if cfg.DONKEY_GYM: from donkeycar.parts.dgym import DonkeyGymEnv cam = DonkeyGymEnv(cfg.DONKEY_SIM_PATH, env_name=cfg.DONKEY_GYM_ENV_NAME) threaded = True inputs = ['angle', 'throttle'] V.add(cam, inputs=inputs, outputs=['camera/arr'], threaded=threaded) #V.add(cam, outputs=["camera/arr"], threaded=True) img_to_jpg = ImgArrToJpg() V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"]) pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME, broker=cfg.MQTT_BROKER) V.add(pub_cam, inputs=["camera/jpg"]) sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME, def_value=(0., 0.), broker=cfg.MQTT_BROKER) V.add(sub_controls, outputs=["angle", "throttle"]) # steering_controller = PCA9685(cfg.STEERING_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM) # steering = PWMSteering(controller=steering_controller, # left_pulse=cfg.STEERING_LEFT_PWM, # right_pulse=cfg.STEERING_RIGHT_PWM) # throttle_controller = PCA9685(cfg.THROTTLE_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM) # throttle = PWMThrottle(controller=throttle_controller, # max_pulse=cfg.THROTTLE_FORWARD_PWM,
class DonkeyCarSpeed: def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker) self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 def observe(self): self.img = self.jpgToImg.run(self.camera_sub.run()) self.state = self.state_sub.run() def reset(self): self.controller_pub.run([0, 0]) self.observe() time.sleep(1) return self.img def step(self, control, step_length): self.controller_pub.run(control) time.sleep(step_length) self.observe() v = self.state["v"] self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5 done = self.is_dead() return self.img, done def is_dead(self): crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage #(im[:,:,0] > threshold) & (im[:,:,1] < 150) & (im[:,:,1] < 150) crop = self.img[-crop_height:, crop_width:-crop_width] #gs = np.dot(crop, [0.299, 0.587, 0.114]) r = crop[:,:,0] < threshold g = crop[:,:,1] < threshold b = crop[:,:,2] < threshold pixels = (r & g & b).sum() #im = self.state[-crop_height:, crop_width:-crop_width] #gs = (im[:,:,0] > 150) & (im[:,:,1] < 150) & (im[:,:,1] < 150) #pixels = len(gs[gs]) #pixels = len(gs[gs < threshold]) print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required
def __init__(self, donkey_name, mqtt_broker): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr()
class RL_Agent(): def __init__(self, alg_type, sim, car_name=args.car_name): self.args = define_config() self.agent = Dreamer(self.args) self.sim = sim self.image = np.zeros((120, 160, 3)) self.observation = torch.zeros( (1, 3, 64, 64)) # init observation, with batch dim self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org") def reset(self, image): self.episode += 1 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 # self.command_history = np.zeros(3*COMMAND_HISTORY_LENGTH) # self.state = np.vstack([image for x in range(FRAME_STACK)]) self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.buffer_sent = False self.buffer_received = False self.params_sent = False self.params_received = False def train(self): #print(f"Training for {int(time.time() - self.training_start)} seconds") if (time.time() - self.training_start) > TRAINING_TIMEOUT: """Temporary fix for when sometimes the replay buffer fails to send""" self.training_start = time.time() self.buffers_sent = 0 self.replay_buffer_pub.run((0, False)) return False if len(self.replay_buffer) > 0: buffers_received = self.replay_buffer_received_sub.run() if self.buffers_sent == buffers_received: self.buffers_sent += 1 self.replay_buffer_pub.run( (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE])) print( f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations" ) self.replay_buffer = self.replay_buffer[BLOCK_SIZE:] return True if self.replay_buffer_received_sub.run() == self.buffers_sent: self.buffers_sent = 0 self.replay_buffer_received_pub.run(0) self.replay_buffer_pub.run((0, False)) new_params = self.param_sub.run() if not new_params: return True print("Received new params.") self.agent.import_parameters(new_params) self.param_pub.run(False) return False def run(self, image, speed=None): if not speed: self.speed = self.target_speed else: self.speed = speed if image is not None: self.image = image self.dead = self.is_dead( self.image) if not self.sim else self.is_dead_sim(self.image) if self.step > 0 and not self.training: """Save observation to replay buffer""" reward = 1 + (self.speed - self.args.throttle_min) / ( self.args.throttle_max - self.args.throttle_min) # reward = min(reward, 2) / 2 # reward = self.speed + 1 done = self.dead reward = reward * -10 if self.dead else reward # reward = -self.speed - 10 if self.dead else reward # cv2.imwrite("./obs/img_{t}.png".format(t=self.step), self.image) next_observation = self.agent.process_im(self.image) # self.replay_buffer.append((self.observation, # self.action.cpu(), # reward, # done)) self.replay_buffer.append( (next_observation, self.action.cpu(), reward, done)) # next_command_history = np.roll(self.command_history, 3) # next_command_history[:3] = [self.steering, self.target_speed, self.speed] # next_state = np.roll(self.state, 1) # next_state[:1, :, :] = self.agent.process_im(self.image, IMAGE_SIZE, RGB) # self.replay_buffer.append([ [self.state, self.command_history], # [self.steering, self.target_speed], # [reward], # [next_state, next_command_history], # [float(not done)]]) self.episode_reward += reward step_end = time.time() self.observation = next_observation # obs is a tensor(3, 64, 64), img is a numpy (120, 180, 3) print( f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Steering, {self.steering:.2f}" ) # self.state = next_state # self.command_history = next_command_history # print(f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}") if self.step > self.args.max_episodes_steps or (self.dead and not self.training): self.training_start = time.time() self.step = 0 self.steering = 0 self.target_speed = 0 self.training = True self.replay_buffer = self.replay_buffer[self.args. skip_initial_steps:] return self.steering, self.target_speed, self.training if self.training: self.training = self.train() self.dead = False return self.steering, self.target_speed, self.training if self.step == 0: if not self.sim: input("Press Enter to start a new episode.") self.reset(self.agent.process_im(self.image)) self.step += 1 if self.step < self.args.skip_initial_steps: return 0.001, 0, False self.step_start = time.time() if self.episode <= self.args.random_episodes: self.steering = np.random.normal(0, 1) self.target_speed = 0.5 self.action = torch.tensor([[self.steering, self.target_speed]], device=self.args.device) else: with torch.no_grad(): self.belief, self.posterior_state = self.agent.infer_state( self.observation.to(self.args.device), action=self.action, belief=self.belief, state=self.posterior_state) self.action = self.agent.select_action( (self.belief, self.posterior_state)) # print("before limit", self.action) # maintain act_history # self.act_history = torch.roll(act_history, -args.action_size, dims=-1) # self.act_history[:, -args.action_size:] = action # to get steering and target_speed as numpy action = self.action.cpu().numpy( ) # act dim : [batch_size, act_size] # action = self.enforce_limits(action[0], self.steering) # size [act_size] self.steering, self.target_speed = action[0][0], action[0][1] # self.action[0] = torch.tensor(action).to(self.action) # print("after limit ", self.action) ## didn't use enforce_limit yet # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # TODO: change this return self.steering, self.target_speed, self.training # action = self.agent.select_action((self.state, self.command_history)) # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # return self.steering, self.target_speed, self.training def is_dead(self, img): """ Counts the black pixels from the ground and compares the amount to a threshold value. If there are not enough black pixels the car is assumed to be off the track. """ crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() # print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self, img): crop_height = 40 required = 0.8 cropped = img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required def enforce_limits(self, action, prev_steering): """ Scale the agent actions to environment limits """ var = (self.args.throttle_max - self.args.throttle_min) / 2 mu = (self.args.throttle_max + self.args.throttle_min) / 2 steering_min = max(self.args.steer_limit_left, prev_steering - self.args.max_steering_diff) steering_max = min(self.args.steer_limit_right, prev_steering + self.args.max_steering_diff) steering = max(steering_min, min(steering_max, action[0])) return np.array([steering, action[1] * var + mu], dtype=np.float32)
class DonkeyRemoteEnv: def __init__(self, car_name="Kari", mqtt_broker="mqtt.eclipse.org", realsense=False, env_type="DonkeyCar"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name, broker=mqtt_broker) if realsense: self.state_sub = MQTTValueSub("donkey/%s/state" % car_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 self.realsense = realsense self.sim = env_type != "DonkeyCar" def observe(self): self.img = self.jpgToImg.run(self.camera_sub.run()) if self.realsense: self.state = self.state_sub.run() v = self.state["v"] self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5 def reset(self): self.controller_pub.run([0, 0]) self.observe() time.sleep(2) return self.img def step(self, control, step_length): print(control) self.controller_pub.run(control) time.sleep(step_length) self.observe() done = self.is_dead_sim() if self.sim else self.is_dead_real() return self.img, done def is_dead_real(self): crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = self.img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self): crop_height = 40 required = 0.8 cropped = self.img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required
cfg = dk.load_config() V = dk.Vehicle() print("starting up", cfg.DONKEY_UNIQUE_NAME, "for remote management.") cam = PiCamera(image_w=cfg.IMAGE_W, image_h=cfg.IMAGE_H, image_d=cfg.IMAGE_DEPTH) V.add(cam, outputs=["camera/arr"], threaded=True) img_to_jpg = ImgArrToJpg() V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"]) pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME) V.add(pub_cam, inputs=["camera/jpg"]) sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME, def_value=(0., 0.)) V.add(sub_controls, outputs=["angle", "throttle"]) steering_controller = ServoBlaster(cfg.STEERING_CHANNEL) #PWM pulse values should be in the range of 100 to 200 assert (cfg.STEERING_LEFT_PWM <= 200) assert (cfg.STEERING_RIGHT_PWM <= 200) steering = PWMSteering(controller=steering_controller, left_pulse=cfg.STEERING_LEFT_PWM, right_pulse=cfg.STEERING_RIGHT_PWM)
def __init__(self, alg_type, sim, car_name=args.car_name, encoder_update=args.encoder_update, max_episode_steps=MAX_EPISODE_STEPS): if encoder_update == "pixel": print("Using Pixel encoder") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = False if encoder_update == "ae": print("Using Autoencoder loss") PARAMS["sac"]["encoder_critic_loss"] = False PARAMS["sac"]["encoder_ae_loss"] = True if encoder_update == "aesac": print("Using autoencoder and critic loss") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = True self.agent = AE_SAC(PARAMS) self.sim = sim self.image = np.zeros((120, 160, 3)) self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([ self.agent.process_im(self.image, IMAGE_SIZE, RGB) for x in range(FRAME_STACK) ]) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.max_episode_steps = max_episode_steps self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org")
class RL_Agent(): def __init__(self, alg_type, sim, car_name=args.car_name, encoder_update=args.encoder_update, max_episode_steps=MAX_EPISODE_STEPS): if encoder_update == "pixel": print("Using Pixel encoder") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = False if encoder_update == "ae": print("Using Autoencoder loss") PARAMS["sac"]["encoder_critic_loss"] = False PARAMS["sac"]["encoder_ae_loss"] = True if encoder_update == "aesac": print("Using autoencoder and critic loss") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = True self.agent = AE_SAC(PARAMS) self.sim = sim self.image = np.zeros((120, 160, 3)) self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([ self.agent.process_im(self.image, IMAGE_SIZE, RGB) for x in range(FRAME_STACK) ]) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.max_episode_steps = max_episode_steps self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org") def reset(self, image): self.episode += 1 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([image for x in range(FRAME_STACK)]) self.buffer_sent = False self.buffer_received = False self.params_sent = False self.params_received = False def train(self): #print(f"Training for {int(time.time() - self.training_start)} seconds") if (time.time() - self.training_start) > TRAINING_TIMEOUT: """Temporary fix for when sometimes the replay buffer fails to send""" self.training_start = time.time() self.buffers_sent = 0 self.replay_buffer_pub.run((0, False)) return False if len(self.replay_buffer) > 0: buffers_received = self.replay_buffer_received_sub.run() if self.buffers_sent == buffers_received: self.buffers_sent += 1 self.replay_buffer_pub.run( (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE])) print( f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations" ) self.replay_buffer = self.replay_buffer[BLOCK_SIZE:] return True if self.replay_buffer_received_sub.run() == self.buffers_sent: self.buffers_sent = 0 self.replay_buffer_received_pub.run(0) self.replay_buffer_pub.run((0, False)) new_params = self.param_sub.run() if not new_params: return True print("Received new params.") self.agent.import_parameters(new_params) self.param_pub.run(False) return False def run(self, image, speed=None): if not speed: self.speed = self.target_speed else: self.speed = speed if image is not None: self.image = image self.dead = self.is_dead( self.image) if not self.sim else self.is_dead_sim(self.image) if self.step > 0 and not self.training: """Save observation to replay buffer""" if THROTTLE_MAX == THROTTLE_MIN: reward = 1 else: reward = 1 + (self.speed - THROTTLE_MIN) / (THROTTLE_MAX - THROTTLE_MIN) reward = min(reward, 2) / 2 done = self.dead reward = reward * -10 if self.dead else reward next_command_history = np.roll(self.command_history, 3) next_command_history[:3] = [ self.steering, self.target_speed, self.speed ] next_state = np.roll(self.state, IMAGE_CHANNELS) next_state[:IMAGE_CHANNELS, :, :] = self.agent.process_im( self.image, IMAGE_SIZE, RGB) self.replay_buffer.append([[self.state, self.command_history], [self.steering, self.target_speed], [reward], [next_state, next_command_history], [float(not done)]]) self.episode_reward += reward step_end = time.time() self.state = next_state self.command_history = next_command_history print( f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Throttle: {self.target_speed:.2f}" ) if self.step > self.max_episode_steps or (self.dead and not self.training): self.training_start = time.time() self.step = 0 self.steering = 0 self.target_speed = 0 self.training = True self.replay_buffer = self.replay_buffer[SKIP_INITIAL_STEPS:] return self.steering, self.target_speed, self.training if self.training: self.training = self.train() self.dead = False return self.steering, self.target_speed, self.training if self.step == 0: if not self.sim: input("Press Enter to start a new episode.") self.reset(self.agent.process_im(self.image, IMAGE_SIZE, RGB)) self.step += 1 if self.step < SKIP_INITIAL_STEPS: return 0.001, 0, False self.step_start = time.time() #if self.episode < RANDOM_EPISODES: # action = action_space.sample() #else: action = self.agent.select_action((self.state, self.command_history)) self.steering, self.target_speed = self.enforce_limits( action, self.command_history[:2]) return self.steering, self.target_speed, self.training def is_dead(self, img): """ Counts the black pixels from the ground and compares the amount to a threshold value. If there are not enough black pixels the car is assumed to be off the track. """ crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() #print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self, img): crop_height = 40 required = 0.8 cropped = img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required def enforce_limits(self, action, prev_action): """ Scale the agent actions to environment limits """ prev_steering = prev_action[0] prev_throttle = prev_action[1] var = (THROTTLE_MAX - THROTTLE_MIN) / 2 mu = (THROTTLE_MAX + THROTTLE_MIN) / 2 raw_throttle = action[1] * var + mu steering = np.clip(action[0], prev_steering - MAX_STEERING_DIFF, prev_steering + MAX_STEERING_DIFF) steering = np.clip(steering, STEER_LIMIT_LEFT, STEER_LIMIT_RIGHT) throttle = np.clip(raw_throttle, prev_throttle - MAX_THROTTLE_DIFF, prev_throttle + MAX_THROTTLE_DIFF) return [steering, throttle]