# Launch the env with our helper function env = launch_env() # Wrappers env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = ActionWrapper(env) # env = DtRewardWrapper(env) # not during testing state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy policy = DDPG(state_dim, action_dim, max_action, net_type="cnn") policy.load(file_name, directory="./pytorch_models") with torch.no_grad(): while True: obs = env.reset() env.render() rewards = [] while True: action = policy.predict(np.array(obs)) obs, rew, done, misc = env.step(action) rewards.append(rew) env.render() if done: break
noise_sigma = 0.2 noise = OUNoise(mu=np.zeros(action_dim), sigma=noise_sigma) # Load VAE image_dimensions = 3 * 160 * 120 feature_dimensions = 1000 encoding_dimensions = 40 vae = VAE(image_dimensions, feature_dimensions, encoding_dimensions, 'selu') if(use_pr): replay_buffer = utils.PrioritizedReplayBuffer(args.replay_buffer_max_size) else: replay_buffer = utils.ReplayBuffer(args.replay_buffer_max_size) # Initialize policy policy = DDPG(state_dim, action_dim, max_action, replay_buffer, net_type="vae", vae=vae) # Evaluate untrained policy evaluations= [evaluate_policy(env, policy)] exp.metric("rewards", evaluations[0]) total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True episode_reward = None env_counter = 0 while total_timesteps < args.max_timesteps:
# Wrappers env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120 env = ActionWrapper(env) env = DtRewardWrapper(env) # Set seeds seed(args.seed) state_dim = env.observation_space.shape action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy policy = DDPG(state_dim, action_dim, max_action, net_type="cnn") replay_buffer = utils.ReplayBuffer(args.replay_buffer_max_size) # Evaluate untrained policy evaluations = [evaluate_policy(env, policy)] exp.metric("rewards", evaluations[0]) total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True episode_reward = None env_counter = 0 while total_timesteps < args.max_timesteps:
camera_height=480, accept_start_angle_deg=4, full_transparency=True, distortion=True, randomize_maps_on_reset=True, draw_curve=False, draw_bbox=False, frame_skip=4, draw_DDPG_features=False) state_dim = env.get_features().shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) # Initialize policy expert = DDPG(state_dim, action_dim, max_action, net_type="dense") expert.load("model-here", directory="../duckietown_rl/pytorch_models", for_inference=True) # Initialize the environment env.reset() # Get features(state representation) for RL agent obs = env.get_features() EPISODES, STEPS = 20, 1000 DEBUG = False # please notice logger = Logger(env, log_file=f'train-{int(EPISODES*STEPS/1000)}k.log') start_time = time.time()