def train_model(args): # We are using custom model and environment, which need to be registered in ray/rllib # Names can be anything. register_env("DuckieTown-MultiMap", lambda _: DiscreteWrapper(MultiMapEnv())) # Define trainer. Apart from env, config/framework and config/model, which are common among trainers. # Here is a list of default config keys/values: # https://docs.ray.io/en/master/rllib-training.html#common-parameters # For DQN specifically there are also additionally these keys: # https://docs.ray.io/en/master/rllib-algorithms.html#dqn trainer = DQNTrainer( env="DuckieTown-MultiMap", config={ "framework": "torch", "model": { "custom_model": "image-dqn", }, "learning_starts": 500, # Doing this allows us to record images from the DuckieTown Gym! Might be useful for report. # "record_env": True, "train_batch_size": 16, # Use a very small buffer to reduce memory usage, default: 50_000. "buffer_size": 1000, # Dueling off "dueling": False, # No hidden layers "hiddens": [], # Don't save experiences. # "output": None, # "compress_observations": True, "num_workers": 0, "num_gpus": 0.5, "rollout_fragment_length": 50, }) # Start training from a checkpoint, if available. if args.model_path: trainer.restore(args.model_path) plot = plotter.Plotter('dqn_agent') for i in range(args.epochs): # Number of episodes (basically epochs) print( f'----------------------- Starting epoch {i} ----------------------- ' ) # train() trains only a single episode result = trainer.train() print(result) plot.add_results(result) # Save model so far. checkpoint_path = trainer.save() print(f'Epoch {i}, checkpoint saved at: {checkpoint_path}') # Cleanup CUDA memory to reduce memory usage. torch.cuda.empty_cache() # Debug log to monitor memory. print(torch.cuda.memory_summary(device=None, abbreviated=False)) plot.plot('DQN DuckieTown-MultiMap')
def main(): # Get the environment and extract the number of actions. print("Using environment", environment_name) environment = gym.make(environment_name) environment = DiscreteWrapper(environment) np.random.seed(666) nb_actions = environment.action_space.n # Build the model. model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = DuckieTownProcessor() # Select a policy. We use eps-greedy action selection, which means that a random action is selected # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that # the agent initially explores the environment (high eps) and then gradually sticks to what it knows # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 # so that the agent still performs some random actions. This ensures that the agent cannot get stuck. policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, #nb_steps=1000000 nb_steps=400000) # The trade-off between exploration and exploitation is difficult and an on-going research topic. # If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! callbacks = [] dqn.fit( environment, callbacks=callbacks, #nb_steps=1750000, nb_steps=500000, log_interval=10000, visualize=True)
def get_env(): # Simulator env uses a single map, so better for evaluation/testing. # DiscreteWrapper just converts wheel velocities to high level discrete actions. return DiscreteWrapper(simulator.Simulator( map_name=args.map, max_steps=2000, ))
def creator(): if args.env_name is None: env = DuckietownEnv( map_name=args.map_name, draw_curve=args.draw_curve, draw_bbox=args.draw_bbox, domain_rand=args.domain_rand, frame_skip=args.frame_skip, distortion=args.distortion, ) else: env = gym.make(args.env_name) return DiscreteWrapper(env) if discrete else env
def train_model(args): # We are using custom model and environment, which need to be registered in ray/rllib # Names can be anything. register_env("DuckieTown-MultiMap", lambda _: DiscreteWrapper(MultiMapEnv())) # Define trainer. Apart from env, config/framework and config/model, which are common among trainers. trainer = PPOTrainer( env="DuckieTown-MultiMap", config={ "framework": "torch", "model": { "custom_model": "image-ppo", }, "sgd_minibatch_size": 64, "output": None, "compress_observations": True, "num_workers": 0, } ) # Start training from a checkpoint, if available. if args.model_path: trainer.restore(args.model_path) plot = plotter.Plotter('ppo_agent') for i in range(args.epochs): # Number of episodes (basically epochs) print(f'----------------------- Starting epoch {i} ----------------------- ') # train() trains only a single episode result = trainer.train() print(result) plot.add_results(result) # Save model so far. checkpoint_path = trainer.save() print(f'Epoch {i}, checkpoint saved at: {checkpoint_path}') # Cleanup CUDA memory to reduce memory usage. torch.cuda.empty_cache() # Debug log to monitor memory. print(torch.cuda.memory_summary(device=None, abbreviated=False)) plot.plot('PPO DuckieTown-MultiMap')
def main(): weights_filename = "dqn_Duckietown-4way-v0_weights.h5f" # Get the environment and extract the number of actions. environment_name = weights_filename.split("_")[ 1] #"Duckietown-straight_road-v0" environment = gym.make(environment_name) environment = DiscreteWrapper(environment) np.random.seed(666) nb_actions = environment.action_space.n # Build the model. model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) print(model.summary()) # Create memory. memory = SequentialMemory( limit=1000000, window_length=WINDOW_LENGTH) # TODO Why is this necessary? # Create the processor. processor = DuckieTownProcessor() # Create the DQN-Agent. dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, processor=processor, ) dqn.target_model = dqn.model # TODO Why is this necessary? dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) # TODO Why is this necessary? # Load the weights. dqn.load_weights(weights_filename) # Test the agent. dqn.test(environment, nb_episodes=10, visualize=True)
help='Location to pre-trained model') return parser.parse_args() if __name__ == '__main__': args = get_parser() # Start ray ray.init() ModelCatalog.register_custom_model("image-dqn", RLLibDQNCritic) # We are using custom model and environment, which need to be registered in ray/rllib # Names can be anything. register_env("DuckieTown-MultiMap", lambda _: DiscreteWrapper(MultiMapEnv())) csv_path = "searches/dqn_results.csv" starting_idx = 0 if os.path.exists(csv_path): with open(csv_path, mode="r") as f: starting_idx = len(f.readlines()) for search_idx in trange(args.n_searches, desc="Searches"): config = { "framework": "torch", "model": { "custom_model": "image-dqn", }, "learning_starts": 500, # "record_env": True, # Doing this allows us to record images from the DuckieTown Gym! Might be useful for report. # noqa: E501