def step(self, action): tic = time.time() # 1. Take action in the simulator based on the agents action choice self.do_action(action) # 2: Update the environment state variables after the action takes place self.pset_simulator_state_info() # 3: Calculate reward reward, done = self.calc_reward() self.time_to_step = time.time() - tic print('Time to step: ', self.time_to_step) # Send the data collected off to gui self.send_to_gui(action, reward, done) if self.mode == "inertial": return (self.current_inertial_state, reward, done, self.extra_metadata) elif self.mode == "rgb": # Reduce dimentionality of obs self.obs4 = trim_append_state_vector(self.obs4, self.images_rgb[:,:,self.image_mask_rgb], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, reward, done, self.extra_metadata) elif self.mode == "rgb_normal": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, reward, done, self.extra_metadata) elif self.mode == "rgba": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, reward, done, self.extra_metadata) elif self.mode == "gray": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, reward, done, self.extra_metadata) elif self.mode == "gray_normal": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays() / 255, pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, reward, done, self.extra_metadata) elif self.mode == "both_rgb": self.obs4 = trim_append_state_vector(self.obs4, self.images_rgb[:,:, self.image_mask_rgb], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata) elif self.mode == "both_rgb_normal": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata) elif self.mode == "both_rgba": self.obs4 = trim_append_state_vector(self.obs4, self.images_rgba[:,:, self.image_mask_rgba], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata) elif self.mode == "both_gray": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata) elif self.mode == "both_gray_normal": self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays()/ 255, pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata) else: print("invalid Mode!")
def train_racing_car(primaryNetwork, targetNetwork, env, directory, NUM_EPISODES=1000, EPISODE_ITERATIONS_MAX=100, COPY_PERIOD=50, EPSILON=1): # Set up GUI Video Feeder vehicle_names = ["Car1"] parentImgConn, childImgConn = mp.Pipe() parentStateConn, childStateConn = mp.Pipe() app = AirSimGUI.CarGUI(parentStateConn, parentImgConn, vehicle_names=vehicle_names, num_video_feeds=IMG_VIEWS * IMG_STEP, isRGB=False) COPY_MODEL_FLAG = False copy_model_iterator = 0 for episode in range(NUM_EPISODES): print('Reset racing!', "Episode: ", episode) env.client.simPause(True) # Decay Epsilon to Greedy-Like ep = EPSILON / np.sqrt(episode + 1) if ep < .02: ep = .02 # Episode iterator, copy model iterator and flag signaling to copy parameters or restart episode episode_iterator = 0 DONE_FLAG = False # Train the model more now between rounds so delay is less #primaryNetwork.train(targetNetwork, 20) # num times # Save Model Weights if (episode + 1) % 10 == 0: primaryNetwork.save_session(directory) # Reset s*tates for the next training round obs4 = np.zeros( (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS * IMG_STEP * IMG_VIEWS)) env.client.simPause(False) # Returns inertial state vector, the image observation, and meta data (Time ellapsed) car_state, obs, meta = env.reset() env.client.simPause(True) # Copy model if we have reached copy_period rounds. if COPY_MODEL_FLAG: print('Copying model') targetNetwork.copy_from(primaryNetwork) copy_model_iterator = 0 COPY_MODEL_FLAG = False print('Copying model complete!') env.client.simPause(False) #print(obs.shape, obs4.shape) obs4 = trim_append_state_vector(obs4, obs, pop_index=IMG_VIEWS * IMG_CHANNELS) # Return and Loss episode_return = 0 episode_loss = 0 while not DONE_FLAG and episode_iterator < EPISODE_ITERATIONS_MAX: episode_iterator += 1 tic = time.time() # Sample action action = primaryNetwork.sample_action(obs4, ep) # Update Stacked State prev_obs4 = obs4 if episode_iterator < 10: state, obs, reward, DONE_FLAG, meta = env.step( env.action_num("Throttle Up")) # new single observation else: state, obs, reward, DONE_FLAG, meta = env.step( action) # new single observation # Pause Sim env.client.simPause(True) # Reduce dimentionality of obs obs4 = trim_append_state_vector( obs4, obs, pop_index=IMG_VIEWS * IMG_CHANNELS) # pop old and append new state obsv # Add new reward to running episode return episode_return += reward # Save new experience and train primaryNetwork.add_experience(prev_obs4, action, reward, obs4, DONE_FLAG) loss = primaryNetwork.train( targetNetwork) # train by using the training model # Append Loss if loss is not None: # Meaning, we have enough training exmaples to actually start training.. episode_loss += loss # Increment Copy Model Iterator copy_model_iterator += 1 # Signal a copy to the training model if we reach copy_period timesteps if copy_model_iterator % COPY_PERIOD == 0: COPY_MODEL_FLAG = True # Send off to the GUI Process! tic2 = time.time() d1 = dict.fromkeys(vehicle_names, np.array(obs4 * 255, dtype=np.uint8)) d2 = dict.fromkeys(vehicle_names, state) childImgConn.send(d1) # Make it a image format (unsigned integers) childStateConn.send(d2) print("GUI Process Update Time: ", time.time() - tic2) # Print Output #print(state) print("Episode: ", episode, ", Iteration: ", episode_iterator, ", Reward: ", reward, ", Loss: ", loss, ", Action: ", env.action_name(action), ", Ellasped Time: ", time.time() - tic) env.client.simPause(False) # Add Excel and GUI Listeners to Loop print("Total Episode Return: ", episode_return, ", Total Episode Loss: ", episode_loss)
def train_racing_drones(_vehicle_names, primaryNetwork, targetNetwork, env, directory, NUM_EPISODES=1000, EPISODE_ITERATIONS_MAX=100, COPY_PERIOD=200, EPSILON=1): # Set up GUI Video Feeder vehicle_names = _vehicle_names parentImgConn, childImgConn = mp.Pipe() parentStateConn, childStateConn = mp.Pipe() app = AirSimGUI.QuadcopterGUI(parentStateConn, parentImgConn, vehicle_names=vehicle_names, num_video_feeds=IMG_VIEWS * IMG_STEP, isRGB=False) COPY_MODEL_FLAG = False copy_model_iterator = 0 for episode in range(NUM_EPISODES): print('Reset racing!', "Episode: ", episode) # Decay Epsilon to Greedy-Like ep = EPSILON / np.sqrt(episode + 1) if ep < .02: ep = .02 # Episode iterator, copy model iterator and flag signaling to copy parameters or restart episode episode_iterator = 0 DONE_FLAGs = dict.fromkeys(vehicle_names, False) env.client.simPause(True) # Copy model if we have reached copy_period rounds. if COPY_MODEL_FLAG: print('Copying model') targetNetwork.copy_from(primaryNetwork.CDQN) copy_model_iterator = 0 COPY_MODEL_FLAG = False print('Copying model complete!') time.sleep(1) env.client.simPause(False) # Train the model more now between rounds so delay is less #primaryNetwork.train(targetNetwork, 20) # num times # Save Model Weights if (episode + 1) % 10 == 0: primaryNetwork.save_session(directory) # Reset s*tates for the next training round obs4s = dict.fromkeys( vehicle_names, np.zeros( (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS * IMG_STEP * IMG_VIEWS))) # Returns inertial state vector, the image observation, and meta data (Time ellapsed) states, obss, metas = env.reset() #print(obs.shape, obs4.shape) for vn in vehicle_names: obs4s[vn] = trim_append_state_vector(obs4s[vn], obss[vn], pop_index=IMG_VIEWS * IMG_CHANNELS) # Return and Loss episode_return = 0 episode_loss = 0 while not np.sum(np.array( list(DONE_FLAGs.values()), dtype=np.int)) and episode_iterator < EPISODE_ITERATIONS_MAX: episode_iterator += 1 tic = time.time() # Sample quad actions actions = primaryNetwork.sample_actions(obs4s, ep) # Update Stacked States prev_obs4s = obs4s states, obss, rewards, DONE_FLAGs, metas = env.steps( actions) # new single observation # Pause Sim env.client.simPause(True) # Stack New Obsvs for vn in vehicle_names: obs4s[vn] = trim_append_state_vector( obs4s[vn], obss[vn], pop_index=IMG_VIEWS * IMG_CHANNELS) # pop old and append new state obsv # Add new rewards to running episode return episode_return += np.sum( np.array([rewards[vn] for vn in vehicle_names])) # Save new experience and train primaryNetwork.add_experiences(prev_obs4s, actions, rewards, obs4s, DONE_FLAGs) loss = primaryNetwork.train( targetNetwork) # train by using the training model # Append Loss if loss is not None: # Meaning, we have enough training exmaples to actually start training.. episode_loss += loss # Increment Copy Model Iterator copy_model_iterator += 1 # Signal a copy to the training model if we reach copy_period timesteps if copy_model_iterator % COPY_PERIOD == 0: COPY_MODEL_FLAG = True # Send off to the GUI Process! tic2 = time.time() gui_obs = dict.fromkeys(vehicle_names) for vn in vehicle_names: gui_obs[vn] = np.array(obs4s[vn] * 255, dtype=np.uint8) childImgConn.send( gui_obs) # Make it a image format (unsigned integers) childStateConn.send(states) print("GUI Process Update Time: ", time.time() - tic2) # Print Output #print(rewards,actions,DONE_FLAGs) print("Episode: ", episode, ", Iteration: ", episode_iterator, ", Rewards: ", rewards[vehicle_names[0]], ", Loss: ", loss, ", Action Dron 0: ", actions[vehicle_names[0]], ", Ellasped Time: ", time.time() - tic) env.client.simPause(False) # Play # Add Excel and GUI Listeners to Loop print("Total Episode Return: ", episode_return, ", Total Episode Loss: ", episode_loss)
def reset(self): # Reset the Car print('Reseting Car') self.client.reset() self.client.confirmConnection() self.client.enableApiControl(True) print('Reset Complete') # Start Timer for episode's first step: #state = self.client.getCarState() state = self.client.simGetGroundTruthKinematics() self.initial_position = (state['position']['x_val'], state['position']['y_val'], state['position']['z_val']) self.initial_velocity = (state['linear_velocity']['x_val'], state['linear_velocity']['y_val'], state['linear_velocity']['z_val']) # Set the environment state and image properties from the simulator self.pset_simulator_state_info() # Reset throttle break and steer self.throttle = 0 self.steering = 0 self.brake = 0 self.extra_metadata = { 'action': 0, 'action_name': 0, 'env_state': { 'resetting': True, 'running': False }, 'mode': 0, 'reward': 0, 'done': 0, 'times': { 'act_time': 0, 'sim_img_time': 0, 'sim_state_time': 0, 'reward_time': 0, 'step_time': 0 } } if self.mode == "inertial": return (self.current_inertial_state, self.extra_metadata) elif self.mode == "rgb": # Reduce dimentionality of obs self.obs4 = trim_append_state_vector( self.obs4, self.images_rgb[:, :, self.image_mask_rgb], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "rgb_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "rgba": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "gray": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "gray_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays() / 255, pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "both_rgb": self.obs4 = trim_append_state_vector( self.obs4, self.images_rgb[:, :, self.image_mask_rgb], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_rgb_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_rgba": self.obs4 = trim_append_state_vector( self.obs4, self.images_rgba[:, :, self.image_mask_rgba], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_gray": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_gray_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays() / 255, pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) else: print("invalid Mode!")
def reset(self): # Reset the Copter print('Reseting Quad') self.client.reset() self.client.confirmConnection() self.client.enableApiControl(True) #self.client.simSetVehiclePose(Vector3r(0,0,-2), ignore_collison = True) # Quickly raise the quadcopter to a few meters -- Speed up for i in range(8): state = self.client.simGetGroundTruthKinematics() quad_vel = state['linear_velocity'] quad_offset = (0, 0, -self.scaling_factor) self.client.moveByVelocityAsync(quad_vel['x_val'] + quad_offset[0], quad_vel['y_val'] + quad_offset[1], quad_vel['z_val'] + quad_offset[2], self.action_duration) time.sleep(self.action_duration) state = self.client.simGetGroundTruthKinematics() self.initial_position = (state['position']['x_val'], state['position']['y_val'], state['position']['z_val'] * -1) self.initial_velocity = (state['linear_velocity']['x_val'], state['linear_velocity']['y_val'], state['linear_velocity']['z_val']) print("Initial Quad Position: ", self.initial_position) print('Reset Complete') # Set the environment state and image properties from the simulator self.pset_simulator_state_info() self.extra_metadata = { 'action': 0, 'action_name': 0, 'env_state': { 'resetting': True, 'running': False }, 'mode': 0, 'reward': 0, 'done': 0, 'times': { 'act_time': 0, 'sim_img_time': 0, 'sim_state_time': 0, 'reward_time': 0, 'step_time': 0 } } if self.mode == "inertial": return (self.current_inertial_state, self.extra_metadata) elif self.mode == "rgb": # Reduce dimentionality of obs self.obs4 = trim_append_state_vector( self.obs4, self.images_rgb[:, :, self.image_mask_rgb], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "rgb_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "rgba": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "gray": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "gray_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays() / 255, pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.obs4, self.extra_metadata) elif self.mode == "both_rgb": self.obs4 = trim_append_state_vector( self.obs4, self.images_rgb[:, :, self.image_mask_rgb], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_rgb_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2rgbs_normal(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_rgba": self.obs4 = trim_append_state_vector( self.obs4, self.images_rgba[:, :, self.image_mask_rgba], pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_gray": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays(), pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) elif self.mode == "both_gray_normal": self.obs4 = trim_append_state_vector( self.obs4, self.rgbs2grays() / 255, pop_index=self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv return (self.current_inertial_state, self.obs4, self.extra_metadata) else: print("invalid Mode!")