def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size
class DonkeyRemoteContoller: def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker)#, def_value=(np.zeros((40, 40)), {"cte": 0})) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size def get_sensor_size(self): return self.sensor_size def wait_until_connected(self): pass def take_action(self, action): self.controller_pub.run(action) def quit(self): self.camera_sub.shutdown() self.controller_pub.shutdown() def get_original_image(self): return self.img def observe(self): jpg = self.camera_sub.run() #data = self.camera_sub.run() # #print("State: {}".format(state)) # #jpg, info = state self.img = self.jpgToImg.run(jpg) # return self.img#, info return self.img
class DonkeyRemoteContoller: def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.sensor_size = sensor_size def get_sensor_size(self): return self.sensor_size def wait_until_connected(self): pass def take_action(self, action): self.controller_pub.run(action) def quit(self): self.camera_sub.shutdown() self.controller_pub.shutdown() def get_original_image(self): return self.img def observe(self): jpg = self.camera_sub.run() self.img = self.jpgToImg.run(jpg) return self.img
def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker) self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0
def __init__(self, alg_type, sim, car_name=args.car_name): self.args = define_config() self.agent = Dreamer(self.args) self.sim = sim self.image = np.zeros((120, 160, 3)) self.observation = torch.zeros( (1, 3, 64, 64)) # init observation, with batch dim self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org")
def __init__(self, car_name="Kari", mqtt_broker="mqtt.eclipse.org", realsense=False, env_type="DonkeyCar"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name, broker=mqtt_broker) if realsense: self.state_sub = MQTTValueSub("donkey/%s/state" % car_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 self.realsense = realsense self.sim = env_type != "DonkeyCar"
import math from docopt import docopt import donkeycar as dk import cv2 from donkeycar.parts.cv import CvImageView, ImgBGR2RGB, ImgRGB2BGR, ImageScale, ImgWriter, ArrowKeyboardControls from donkeycar.parts.salient import SalientVis from donkeycar.parts.network import MQTTValuePub, MQTTValueSub from donkeycar.parts.transform import Lambda from donkeycar.parts.image import JpgToImgArr V = dk.vehicle.Vehicle() args = docopt(__doc__) print(args) V.add(MQTTValueSub(name="donkey/%s/camera" % args["--name"], broker=args["--broker"]), outputs=["jpg"]) V.add(JpgToImgArr(), inputs=["jpg"], outputs=["img_arr"]) V.add(ImgBGR2RGB(), inputs=["img_arr"], outputs=["rgb"]) V.add(ImageScale(4.0), inputs=["rgb"], outputs=["lg_img"]) V.add(CvImageView(), inputs=["lg_img"]) V.add(ArrowKeyboardControls(), outputs=["control"]) V.add(MQTTValuePub(name="donkey/%s/controls" % args["--name"]), inputs=["control"]) record_path = args["--record"] if record_path is not None: class ImageSaver: def __init__(self, path): self.index = 0 self.path = path
threaded = True inputs = ['angle', 'throttle'] V.add(cam, inputs=inputs, outputs=['camera/arr'], threaded=threaded) #V.add(cam, outputs=["camera/arr"], threaded=True) img_to_jpg = ImgArrToJpg() V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"]) pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME, broker=cfg.MQTT_BROKER) V.add(pub_cam, inputs=["camera/jpg"]) sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME, def_value=(0., 0.), broker=cfg.MQTT_BROKER) V.add(sub_controls, outputs=["angle", "throttle"]) # steering_controller = PCA9685(cfg.STEERING_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM) # steering = PWMSteering(controller=steering_controller, # left_pulse=cfg.STEERING_LEFT_PWM, # right_pulse=cfg.STEERING_RIGHT_PWM) # throttle_controller = PCA9685(cfg.THROTTLE_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM) # throttle = PWMThrottle(controller=throttle_controller, # max_pulse=cfg.THROTTLE_FORWARD_PWM, # zero_pulse=cfg.THROTTLE_STOPPED_PWM, # min_pulse=cfg.THROTTLE_REVERSE_PWM) # V.add(steering, inputs=['angle'])
class DonkeyCarSpeed: def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker) self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 def observe(self): self.img = self.jpgToImg.run(self.camera_sub.run()) self.state = self.state_sub.run() def reset(self): self.controller_pub.run([0, 0]) self.observe() time.sleep(1) return self.img def step(self, control, step_length): self.controller_pub.run(control) time.sleep(step_length) self.observe() v = self.state["v"] self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5 done = self.is_dead() return self.img, done def is_dead(self): crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage #(im[:,:,0] > threshold) & (im[:,:,1] < 150) & (im[:,:,1] < 150) crop = self.img[-crop_height:, crop_width:-crop_width] #gs = np.dot(crop, [0.299, 0.587, 0.114]) r = crop[:,:,0] < threshold g = crop[:,:,1] < threshold b = crop[:,:,2] < threshold pixels = (r & g & b).sum() #im = self.state[-crop_height:, crop_width:-crop_width] #gs = (im[:,:,0] > 150) & (im[:,:,1] < 150) & (im[:,:,1] < 150) #pixels = len(gs[gs]) #pixels = len(gs[gs < threshold]) print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required
def __init__(self, donkey_name, mqtt_broker): self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr()
class RL_Agent(): def __init__(self, alg_type, sim, car_name=args.car_name): self.args = define_config() self.agent = Dreamer(self.args) self.sim = sim self.image = np.zeros((120, 160, 3)) self.observation = torch.zeros( (1, 3, 64, 64)) # init observation, with batch dim self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org") def reset(self, image): self.episode += 1 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 # self.command_history = np.zeros(3*COMMAND_HISTORY_LENGTH) # self.state = np.vstack([image for x in range(FRAME_STACK)]) self.belief = torch.zeros(1, self.args.belief_size, device=self.args.device) self.posterior_state = torch.zeros(1, self.args.state_size, device=self.args.device) self.action = torch.zeros(1, self.args.action_size, device=self.args.device) # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device) self.buffer_sent = False self.buffer_received = False self.params_sent = False self.params_received = False def train(self): #print(f"Training for {int(time.time() - self.training_start)} seconds") if (time.time() - self.training_start) > TRAINING_TIMEOUT: """Temporary fix for when sometimes the replay buffer fails to send""" self.training_start = time.time() self.buffers_sent = 0 self.replay_buffer_pub.run((0, False)) return False if len(self.replay_buffer) > 0: buffers_received = self.replay_buffer_received_sub.run() if self.buffers_sent == buffers_received: self.buffers_sent += 1 self.replay_buffer_pub.run( (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE])) print( f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations" ) self.replay_buffer = self.replay_buffer[BLOCK_SIZE:] return True if self.replay_buffer_received_sub.run() == self.buffers_sent: self.buffers_sent = 0 self.replay_buffer_received_pub.run(0) self.replay_buffer_pub.run((0, False)) new_params = self.param_sub.run() if not new_params: return True print("Received new params.") self.agent.import_parameters(new_params) self.param_pub.run(False) return False def run(self, image, speed=None): if not speed: self.speed = self.target_speed else: self.speed = speed if image is not None: self.image = image self.dead = self.is_dead( self.image) if not self.sim else self.is_dead_sim(self.image) if self.step > 0 and not self.training: """Save observation to replay buffer""" reward = 1 + (self.speed - self.args.throttle_min) / ( self.args.throttle_max - self.args.throttle_min) # reward = min(reward, 2) / 2 # reward = self.speed + 1 done = self.dead reward = reward * -10 if self.dead else reward # reward = -self.speed - 10 if self.dead else reward # cv2.imwrite("./obs/img_{t}.png".format(t=self.step), self.image) next_observation = self.agent.process_im(self.image) # self.replay_buffer.append((self.observation, # self.action.cpu(), # reward, # done)) self.replay_buffer.append( (next_observation, self.action.cpu(), reward, done)) # next_command_history = np.roll(self.command_history, 3) # next_command_history[:3] = [self.steering, self.target_speed, self.speed] # next_state = np.roll(self.state, 1) # next_state[:1, :, :] = self.agent.process_im(self.image, IMAGE_SIZE, RGB) # self.replay_buffer.append([ [self.state, self.command_history], # [self.steering, self.target_speed], # [reward], # [next_state, next_command_history], # [float(not done)]]) self.episode_reward += reward step_end = time.time() self.observation = next_observation # obs is a tensor(3, 64, 64), img is a numpy (120, 180, 3) print( f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Steering, {self.steering:.2f}" ) # self.state = next_state # self.command_history = next_command_history # print(f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}") if self.step > self.args.max_episodes_steps or (self.dead and not self.training): self.training_start = time.time() self.step = 0 self.steering = 0 self.target_speed = 0 self.training = True self.replay_buffer = self.replay_buffer[self.args. skip_initial_steps:] return self.steering, self.target_speed, self.training if self.training: self.training = self.train() self.dead = False return self.steering, self.target_speed, self.training if self.step == 0: if not self.sim: input("Press Enter to start a new episode.") self.reset(self.agent.process_im(self.image)) self.step += 1 if self.step < self.args.skip_initial_steps: return 0.001, 0, False self.step_start = time.time() if self.episode <= self.args.random_episodes: self.steering = np.random.normal(0, 1) self.target_speed = 0.5 self.action = torch.tensor([[self.steering, self.target_speed]], device=self.args.device) else: with torch.no_grad(): self.belief, self.posterior_state = self.agent.infer_state( self.observation.to(self.args.device), action=self.action, belief=self.belief, state=self.posterior_state) self.action = self.agent.select_action( (self.belief, self.posterior_state)) # print("before limit", self.action) # maintain act_history # self.act_history = torch.roll(act_history, -args.action_size, dims=-1) # self.act_history[:, -args.action_size:] = action # to get steering and target_speed as numpy action = self.action.cpu().numpy( ) # act dim : [batch_size, act_size] # action = self.enforce_limits(action[0], self.steering) # size [act_size] self.steering, self.target_speed = action[0][0], action[0][1] # self.action[0] = torch.tensor(action).to(self.action) # print("after limit ", self.action) ## didn't use enforce_limit yet # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # TODO: change this return self.steering, self.target_speed, self.training # action = self.agent.select_action((self.state, self.command_history)) # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # return self.steering, self.target_speed, self.training def is_dead(self, img): """ Counts the black pixels from the ground and compares the amount to a threshold value. If there are not enough black pixels the car is assumed to be off the track. """ crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() # print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self, img): crop_height = 40 required = 0.8 cropped = img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required def enforce_limits(self, action, prev_steering): """ Scale the agent actions to environment limits """ var = (self.args.throttle_max - self.args.throttle_min) / 2 mu = (self.args.throttle_max + self.args.throttle_min) / 2 steering_min = max(self.args.steer_limit_left, prev_steering - self.args.max_steering_diff) steering_max = min(self.args.steer_limit_right, prev_steering + self.args.max_steering_diff) steering = max(steering_min, min(steering_max, action[0])) return np.array([steering, action[1] * var + mu], dtype=np.float32)
class DonkeyRemoteEnv: def __init__(self, car_name="Kari", mqtt_broker="mqtt.eclipse.org", realsense=False, env_type="DonkeyCar"): self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name, broker=mqtt_broker) if realsense: self.state_sub = MQTTValueSub("donkey/%s/state" % car_name, broker=mqtt_broker) self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name, broker=mqtt_broker) self.jpgToImg = JpgToImgArr() self.img = np.zeros((40, 40)) self.state = None self.speed = 0 self.realsense = realsense self.sim = env_type != "DonkeyCar" def observe(self): self.img = self.jpgToImg.run(self.camera_sub.run()) if self.realsense: self.state = self.state_sub.run() v = self.state["v"] self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5 def reset(self): self.controller_pub.run([0, 0]) self.observe() time.sleep(2) return self.img def step(self, control, step_length): print(control) self.controller_pub.run(control) time.sleep(step_length) self.observe() done = self.is_dead_sim() if self.sim else self.is_dead_real() return self.img, done def is_dead_real(self): crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = self.img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self): crop_height = 40 required = 0.8 cropped = self.img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required
V = dk.Vehicle() print("starting up", cfg.DONKEY_UNIQUE_NAME, "for remote management.") cam = PiCamera(image_w=cfg.IMAGE_W, image_h=cfg.IMAGE_H, image_d=cfg.IMAGE_DEPTH) V.add(cam, outputs=["camera/arr"], threaded=True) img_to_jpg = ImgArrToJpg() V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"]) pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME) V.add(pub_cam, inputs=["camera/jpg"]) sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME, def_value=(0., 0.)) V.add(sub_controls, outputs=["angle", "throttle"]) steering_controller = ServoBlaster(cfg.STEERING_CHANNEL) #PWM pulse values should be in the range of 100 to 200 assert (cfg.STEERING_LEFT_PWM <= 200) assert (cfg.STEERING_RIGHT_PWM <= 200) steering = PWMSteering(controller=steering_controller, left_pulse=cfg.STEERING_LEFT_PWM, right_pulse=cfg.STEERING_RIGHT_PWM) from donkeycar.parts.actuator import Mini_HBridge_DC_Motor_PWM motor = Mini_HBridge_DC_Motor_PWM(cfg.HBRIDGE_PIN_FWD, cfg.HBRIDGE_PIN_BWD) V.add(steering, inputs=['angle'])
def __init__(self, alg_type, sim, car_name=args.car_name, encoder_update=args.encoder_update, max_episode_steps=MAX_EPISODE_STEPS): if encoder_update == "pixel": print("Using Pixel encoder") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = False if encoder_update == "ae": print("Using Autoencoder loss") PARAMS["sac"]["encoder_critic_loss"] = False PARAMS["sac"]["encoder_ae_loss"] = True if encoder_update == "aesac": print("Using autoencoder and critic loss") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = True self.agent = AE_SAC(PARAMS) self.sim = sim self.image = np.zeros((120, 160, 3)) self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([ self.agent.process_im(self.image, IMAGE_SIZE, RGB) for x in range(FRAME_STACK) ]) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.max_episode_steps = max_episode_steps self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org")
class RL_Agent(): def __init__(self, alg_type, sim, car_name=args.car_name, encoder_update=args.encoder_update, max_episode_steps=MAX_EPISODE_STEPS): if encoder_update == "pixel": print("Using Pixel encoder") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = False if encoder_update == "ae": print("Using Autoencoder loss") PARAMS["sac"]["encoder_critic_loss"] = False PARAMS["sac"]["encoder_ae_loss"] = True if encoder_update == "aesac": print("Using autoencoder and critic loss") PARAMS["sac"]["encoder_critic_loss"] = True PARAMS["sac"]["encoder_ae_loss"] = True self.agent = AE_SAC(PARAMS) self.sim = sim self.image = np.zeros((120, 160, 3)) self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([ self.agent.process_im(self.image, IMAGE_SIZE, RGB) for x in range(FRAME_STACK) ]) self.speed = 0 self.step = 0 self.episode = 0 self.episode_reward = 0 self.replay_buffer = [] self.max_episode_steps = max_episode_steps self.target_speed = 0 self.steering = 0 self.training = False self.step_start = 0 self.buffers_sent = False self.replay_buffer_pub = MQTTValuePub(car_name + "buffer", broker="mqtt.eclipse.org") self.replay_buffer_sub = MQTTValueSub(car_name + "buffer", broker="mqtt.eclipse.org", def_value=(0, True)) self.replay_buffer_received_pub = MQTTValuePub( car_name + "buffer_received", broker="mqtt.eclipse.org") self.replay_buffer_received_sub = MQTTValueSub( car_name + "buffer_received", broker="mqtt.eclipse.org", def_value=0) self.param_pub = MQTTValuePub(car_name + "param", broker="mqtt.eclipse.org") self.param_sub = MQTTValueSub(car_name + "param", broker="mqtt.eclipse.org") def reset(self, image): self.episode += 1 self.episode_reward = 0 self.replay_buffer = [] self.target_speed = 0 self.steering = 0 self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH) self.state = np.vstack([image for x in range(FRAME_STACK)]) self.buffer_sent = False self.buffer_received = False self.params_sent = False self.params_received = False def train(self): #print(f"Training for {int(time.time() - self.training_start)} seconds") if (time.time() - self.training_start) > TRAINING_TIMEOUT: """Temporary fix for when sometimes the replay buffer fails to send""" self.training_start = time.time() self.buffers_sent = 0 self.replay_buffer_pub.run((0, False)) return False if len(self.replay_buffer) > 0: buffers_received = self.replay_buffer_received_sub.run() if self.buffers_sent == buffers_received: self.buffers_sent += 1 self.replay_buffer_pub.run( (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE])) print( f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations" ) self.replay_buffer = self.replay_buffer[BLOCK_SIZE:] return True if self.replay_buffer_received_sub.run() == self.buffers_sent: self.buffers_sent = 0 self.replay_buffer_received_pub.run(0) self.replay_buffer_pub.run((0, False)) new_params = self.param_sub.run() if not new_params: return True print("Received new params.") self.agent.import_parameters(new_params) self.param_pub.run(False) return False def run(self, image, speed=None): if not speed: self.speed = self.target_speed else: self.speed = speed if image is not None: self.image = image self.dead = self.is_dead( self.image) if not self.sim else self.is_dead_sim(self.image) if self.step > 0 and not self.training: """Save observation to replay buffer""" if THROTTLE_MAX == THROTTLE_MIN: reward = 1 else: reward = 1 + (self.speed - THROTTLE_MIN) / (THROTTLE_MAX - THROTTLE_MIN) reward = min(reward, 2) / 2 done = self.dead reward = reward * -10 if self.dead else reward next_command_history = np.roll(self.command_history, 3) next_command_history[:3] = [ self.steering, self.target_speed, self.speed ] next_state = np.roll(self.state, IMAGE_CHANNELS) next_state[:IMAGE_CHANNELS, :, :] = self.agent.process_im( self.image, IMAGE_SIZE, RGB) self.replay_buffer.append([[self.state, self.command_history], [self.steering, self.target_speed], [reward], [next_state, next_command_history], [float(not done)]]) self.episode_reward += reward step_end = time.time() self.state = next_state self.command_history = next_command_history print( f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Throttle: {self.target_speed:.2f}" ) if self.step > self.max_episode_steps or (self.dead and not self.training): self.training_start = time.time() self.step = 0 self.steering = 0 self.target_speed = 0 self.training = True self.replay_buffer = self.replay_buffer[SKIP_INITIAL_STEPS:] return self.steering, self.target_speed, self.training if self.training: self.training = self.train() self.dead = False return self.steering, self.target_speed, self.training if self.step == 0: if not self.sim: input("Press Enter to start a new episode.") self.reset(self.agent.process_im(self.image, IMAGE_SIZE, RGB)) self.step += 1 if self.step < SKIP_INITIAL_STEPS: return 0.001, 0, False self.step_start = time.time() #if self.episode < RANDOM_EPISODES: # action = action_space.sample() #else: action = self.agent.select_action((self.state, self.command_history)) self.steering, self.target_speed = self.enforce_limits( action, self.command_history[:2]) return self.steering, self.target_speed, self.training def is_dead(self, img): """ Counts the black pixels from the ground and compares the amount to a threshold value. If there are not enough black pixels the car is assumed to be off the track. """ crop_height = 20 crop_width = 20 threshold = 70 pixels_percentage = 0.10 pixels_required = (img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage crop = img[-crop_height:, crop_width:-crop_width] r = crop[:, :, 0] < threshold g = crop[:, :, 1] < threshold b = crop[:, :, 2] < threshold pixels = (r & g & b).sum() #print("Pixels: {}, Required: {}".format(pixels, pixels_required)) return pixels < pixels_required def is_dead_sim(self, img): crop_height = 40 required = 0.8 cropped = img[-crop_height:] rgb = cropped[:, :, 0] > cropped[:, :, 2] return rgb.sum() / (crop_height * 160) > required def enforce_limits(self, action, prev_action): """ Scale the agent actions to environment limits """ prev_steering = prev_action[0] prev_throttle = prev_action[1] var = (THROTTLE_MAX - THROTTLE_MIN) / 2 mu = (THROTTLE_MAX + THROTTLE_MIN) / 2 raw_throttle = action[1] * var + mu steering = np.clip(action[0], prev_steering - MAX_STEERING_DIFF, prev_steering + MAX_STEERING_DIFF) steering = np.clip(steering, STEER_LIMIT_LEFT, STEER_LIMIT_RIGHT) throttle = np.clip(raw_throttle, prev_throttle - MAX_THROTTLE_DIFF, prev_throttle + MAX_THROTTLE_DIFF) return [steering, throttle]