# Launch the env with our helper function
env = launch_env()

# Wrappers
env = ResizeWrapper(env)
env = NormalizeWrapper(env)
env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = ActionWrapper(env)
# env = DtRewardWrapper(env) # not during testing

state_dim = env.observation_space.shape
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
policy = DDPG(state_dim, action_dim, max_action, net_type="cnn")

policy.load(file_name, directory="./pytorch_models")

with torch.no_grad():
    while True:
        obs = env.reset()
        env.render()
        rewards = []
        while True:
            action = policy.predict(np.array(obs))
            obs, rew, done, misc = env.step(action)
            rewards.append(rew)
            env.render()
            if done:
                break
Beispiel #2
0
    noise_sigma = 0.2
    noise = OUNoise(mu=np.zeros(action_dim), sigma=noise_sigma)

# Load VAE
image_dimensions = 3 * 160 * 120
feature_dimensions = 1000
encoding_dimensions = 40
vae = VAE(image_dimensions, feature_dimensions, encoding_dimensions, 'selu')

if(use_pr):
    replay_buffer = utils.PrioritizedReplayBuffer(args.replay_buffer_max_size)
else:
    replay_buffer = utils.ReplayBuffer(args.replay_buffer_max_size)

# Initialize policy
policy = DDPG(state_dim, action_dim, max_action, replay_buffer, net_type="vae", vae=vae)


# Evaluate untrained policy
evaluations= [evaluate_policy(env, policy)]

exp.metric("rewards", evaluations[0])

total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
done = True
episode_reward = None
env_counter = 0
while total_timesteps < args.max_timesteps:
# Wrappers
env = ResizeWrapper(env)
env = NormalizeWrapper(env)
env = ImgWrapper(env)  # to make the images from 160x120x3 into 3x160x120
env = ActionWrapper(env)
env = DtRewardWrapper(env)

# Set seeds
seed(args.seed)

state_dim = env.observation_space.shape
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
policy = DDPG(state_dim, action_dim, max_action, net_type="cnn")

replay_buffer = utils.ReplayBuffer(args.replay_buffer_max_size)

# Evaluate untrained policy
evaluations = [evaluate_policy(env, policy)]

exp.metric("rewards", evaluations[0])

total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
done = True
episode_reward = None
env_counter = 0
while total_timesteps < args.max_timesteps:
                camera_height=480,
                accept_start_angle_deg=4,
                full_transparency=True,
                distortion=True,
                randomize_maps_on_reset=True,
                draw_curve=False,
                draw_bbox=False,
                frame_skip=4,
                draw_DDPG_features=False)

state_dim = env.get_features().shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

# Initialize policy
expert = DDPG(state_dim, action_dim, max_action, net_type="dense")
expert.load("model-here",
            directory="../duckietown_rl/pytorch_models",
            for_inference=True)

# Initialize the environment
env.reset()
# Get features(state representation) for RL agent
obs = env.get_features()
EPISODES, STEPS = 20, 1000
DEBUG = False

# please notice
logger = Logger(env, log_file=f'train-{int(EPISODES*STEPS/1000)}k.log')

start_time = time.time()