def get_observations(num_samples, seed): env = DuckietownEnv( map_name = 'map1', domain_rand = False, draw_bbox = False, max_steps = 2000, seed = seed ) env = ResizeWrapper(env, shape=(64, 80, 3)) env = NormalizeWrapper(env) samples = num_samples time_steps = 0 obs_array = np.zeros(shape=(samples, 64, 80, 3)) while time_steps < samples: # collect trajectories env.seed(seed) obs = env.reset() obs_array[time_steps] = obs time_steps += 1 while True: action = env.action_space.sample() obs, reward, done, _ = env.step(action) rollout_done = done or time_steps >= samples if rollout_done: break obs_array[time_steps] = obs time_steps += 1 seed+=1 print(time_steps) return obs_array
def __init__(self): from gym_duckietown.envs.duckietown_env import DuckietownEnv args = { 'map_name': map_name, 'frame_skip': frame_skip, 'distortion': distortion, 'max_steps': max_steps } self.env = DuckietownEnv( **{k: v for k, v in args.items() if not v is None})
def creator(): if args.env_name is None: env = DuckietownEnv( map_name=args.map_name, draw_curve=args.draw_curve, draw_bbox=args.draw_bbox, domain_rand=args.domain_rand, frame_skip=args.frame_skip, distortion=args.distortion, ) else: env = gym.make(args.env_name) return DiscreteWrapper(env) if discrete else env
def train(args, agent_opts, train_opts, rewards=None): duckie.logger.disabled = True # Disable log messages from ducki env = DuckietownEnv( seed = None, map_name = "4way_bordered", max_steps = 500000, draw_curve = False, draw_bbox = False, domain_rand = False, randomize_maps_on_reset = False, accept_start_angle_deg = 4, full_transparency = False, user_tile_start = None, num_tris_distractors = 12, enable_leds = False, navigation=True, num_past_navdirs = 3, num_past_positions = 3, num_past_actions = 2, ) # Load Encoder encoder = BetaVAE_H(10, 3) loaded_model = torch.load(args.encoder_path) encoder.load_state_dict(loaded_model['model_states']['net']) env = ResizeWrapper(env, 64, 64) env = SwapDimensionsWrapper(env) env = ImageNormalizeWrapper(env) env = TorchifyWrapper(env, agent_opts.use_gpu) env = EncoderWrapper(env, encoder, agent_opts.use_gpu) #env = DtRewardWrapper(env) env = ActionWrapper(env) env = GymEnvironment(env) state_size = 31 # Bottleneck of VAE plus the additional informations act_size = env.gym_env.action_space.shape[0] action_def = ContinuousDefinition(env.gym_env.action_space.shape, \ env.gym_env.action_space.high, \ env.gym_env.action_space.low) multihead_net = DuckieNetwork(state_size, act_size) agent = SACAgent(multihead_net, action_def, agent_opts) trainer = trn.Trainer(agent, env, train_opts) trainer.train()
def launch_env(id=None): env = None if id is None: # Launch the environment # from gym_duckietown.simulator import Simulator from gym_duckietown.envs.duckietown_env import DuckietownEnv # env = Simulator( env = DuckietownEnv( seed=123, # random seed map_name="loop_empty", max_steps=500001, # we don't want the gym to reset itself domain_rand=0, camera_width=640, camera_height=480, accept_start_angle_deg=4, # start close to straight full_transparency=True, distortion=True, ) else: env = gym.make(id) return env
from stable_baselines3.common.vec_env import VecNormalize, VecFrameStack, VecTransposeImage from global_configuration import PROJECT_PATH import json # Below 2 lines is for Windows 10 Environment. Comment if running on other OS import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' results_dir = osp.join(PROJECT_PATH, "results", "ppo", "2021-04-13_ppo", "2021-04-13_15-49-19_ppo") with open(osp.join(results_dir, "config.json"), 'r') as f: custom_params = json.load(f) env = DuckietownEnv(map_name='map3', domain_rand=False, draw_bbox=False, max_steps=1500, seed=1) env = RewardWrapper(env) env = ResizeWrapper(env, shape=(64, 80, 3)) if custom_params['algo'] == 'dqn': env = DiscreteWrapper(env) if custom_params['USING_VAE']: env = NormalizeWrapper(env) # No need to use normalization if image env = FinalLayerObservationWrapper(env, latent_dim=1028, map="map3") # Step 3.b. To make Vectorized Environment to be able to use Normalize or FramStack (Optional) env = make_vec_env(lambda: env, n_envs=1)
self.observation_space.high[0, 0, 0], [obs_shape[2], obs_shape[0], obs_shape[1]], dtype=self.observation_space.dtype) def observation(self, observation): return observation.transpose(2, 0, 1) class GTDenseRewardInfoWrapperDT(gym.Wrapper): def gt_reward(self): lane_pose = self.env.get_lane_pos2(self.env.cur_pos, self.env.cur_angle) dist = lane_pose.dist # Distance to lane center. Left is negative, right is positive. angle = angleLimit(lane_pose.angle_rad) gt_reward = state2reward(dist, angle) return gt_reward def step(self, action): observation, reward, done, info = self.env.step(action) info['GT_reward'] = self.gt_reward() return observation, reward, done, info #### Action wrappers if __name__ == '__main__': sim = DTNoisyLaneFollowingRewardWrapper(DuckietownEnv(), 10) print('Environment loaded!') image = sim.render(mode='drone') input()
parser.add_argument('--env-name', default=None) parser.add_argument('--map-name', default='udem1') parser.add_argument('--distortion', default=False, action='store_true') parser.add_argument('--draw-curve', action='store_true', help='draw the lane following curve') parser.add_argument('--draw-bbox', action='store_true', help='draw collision detection bounding boxes') parser.add_argument('--domain-rand', action='store_true', help='enable domain randomization') parser.add_argument('--frame-skip', default=1, type=int, help='number of frames to skip') args = parser.parse_args() if args.env_name is None: env = DuckietownEnv( map_name="loop_empty", max_steps=500001, # we don't want the gym to reset itself domain_rand=0, camera_width=640, camera_height=480, accept_start_angle_deg=1, # start close to straight accept_start_dist = 0.05, full_transparency=False, distortion=False) else: env = gym.make(args.env_name) env.reset() env.render() @env.unwrapped.window.event def on_key_press(symbol, modifiers):
if platform == 'win32': import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # Below is training on linux and if GPU is available if platform == 'linux' and torch.cuda.is_available(): from pyvirtualdisplay import Display display = Display(visible=0, size=(640, 480)) display.start() PROJECT_PATH = osp.abspath(osp.dirname(osp.dirname(__file__))) # Step 1. Initialize the environment env = DuckietownEnv(map_name='map3', domain_rand=False, draw_bbox=False, max_steps=1500, seed=2) # Step 2. Check the custom environment. Must do it before any wrappers #check_env(env) # Step 3. Initialize some parameters custom_params = { 'USING_VAE': False, # whether to use VAE 'FRAME_STACK': 4, 'USING_NORMALIZATION': True, 'algo': 'ppo', 'sac_parameters': { 'buffer_size': int(1e5), 'gradient_steps': 64,
def test(args): duckie.logger.disabled = True # Disable log messages from ducki env = DuckietownEnv( seed = None, map_name = "4way_bordered", max_steps = 500001, draw_curve = False, draw_bbox = False, domain_rand = False, randomize_maps_on_reset = False, accept_start_angle_deg = 4, full_transparency = True, user_tile_start = None, num_tris_distractors = 12, enable_leds = False, ) # Load Encoder encoder = BetaVAE_H(10, 3) loaded_model = torch.load(args.encoder_path) encoder.load_state_dict(loaded_model['model_states']['net']) env = ResizeWrapper(env, 64, 64) env = SwapDimensionsWrapper(env) env = ImageNormalizeWrapper(env) env = TorchifyWrapper(env) env = EncoderWrapper(env, encoder) #env = ActionWrapper(env) env = GymEnvironment(env) state_size = 14 act_size = env.gym_env.action_space.shape[0] action_def = ContinuousDefinition(env.gym_env.action_space.shape, \ env.gym_env.action_space.high, \ env.gym_env.action_space.low) multihead_net = DuckieNetwork(state_size, act_size) agent = SACAgent(multihead_net, action_def) agent.load_model(args.model_path) for i in range(args.n_episodes): total_reward = 0 state = env.reset() state = torch.from_numpy(state).float().unsqueeze(0) while True: action = agent.act(state, evaluation=True) print(action) next_state, reward, done, _ = env.step(action) state = torch.from_numpy(next_state).float().unsqueeze(0) total_reward += reward env.render() if done: print("Total reward:{}".format(total_reward)) break