def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='data/output/env.config') parser.add_argument('--policy_config', type=str, default='data/output/policy.config') parser.add_argument('--policy', type=str, default='scr') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--plot_file', type=str, default=None) args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env = gym.make('CrowdSim-v0') env.configure(args.env_config) robot = Robot() robot.configure(args.env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) humans = [Human() for _ in range(env.human_num)] for human in humans: human.configure(args.env_config, 'humans') env.set_humans(humans) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy', type=str, default='scr') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--train_config', type=str, default='configs/train.config') parser.add_argument('--output_dir', type=str, default='data/output') parser.add_argument('--weights', type=str) parser.add_argument('--resume', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--debug', default=False, action='store_true') args = parser.parse_args() # configure paths make_new_dir = True if os.path.exists(args.output_dir): key = input( 'Output directory already exists! Overwrite the folder? (y/n)') if key == 'y' and not args.resume: shutil.rmtree(args.output_dir) else: make_new_dir = False args.env_config = os.path.join(args.output_dir, os.path.basename(args.env_config)) args.policy_config = os.path.join( args.output_dir, os.path.basename(args.policy_config)) args.train_config = os.path.join( args.output_dir, os.path.basename(args.train_config)) if make_new_dir: os.makedirs(args.output_dir) shutil.copy(args.env_config, args.output_dir) shutil.copy(args.policy_config, args.output_dir) shutil.copy(args.train_config, args.output_dir) log_file = os.path.join(args.output_dir, 'output.log') il_weight_file = os.path.join(args.output_dir, 'il_model.pth') rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth') # configure logging mode = 'a' if args.resume else 'w' file_handler = logging.FileHandler(log_file, mode=mode) stdout_handler = logging.StreamHandler(sys.stdout) level = logging.INFO if not args.debug else logging.DEBUG logging.basicConfig(level=level, handlers=[stdout_handler, file_handler], format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") repo = git.Repo(search_parent_directories=True) logging.info('Current git head hash code: %s'.format( repo.head.object.hexsha)) device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() if not policy.trainable: parser.error('Policy has to be trainable') if args.policy_config is None: parser.error( 'Policy config has to be specified for a trainable network') policy_config = configparser.RawConfigParser() policy_config.read(args.policy_config) policy.configure(policy_config) policy.set_device(device) # configure environment env = gym.make('CrowdSim-v0') env.configure(args.env_config) robot = Robot() robot.configure(args.env_config, 'robot') env.set_robot(robot) humans = [Human() for _ in range(env.human_num)] for human in humans: human.configure(args.env_config, 'humans') env.set_humans(humans) # read training parameters if args.train_config is None: parser.error( 'Train config has to be specified for a trainable network') train_config = configparser.RawConfigParser() train_config.read(args.train_config) rl_learning_rate = train_config.getfloat('train', 'rl_learning_rate') train_batches = train_config.getint('train', 'train_batches') train_episodes = train_config.getint('train', 'train_episodes') sample_episodes = train_config.getint('train', 'sample_episodes') target_update_interval = train_config.getint('train', 'target_update_interval') evaluation_interval = train_config.getint('train', 'evaluation_interval') capacity = train_config.getint('train', 'capacity') epsilon_start = train_config.getfloat('train', 'epsilon_start') epsilon_end = train_config.getfloat('train', 'epsilon_end') epsilon_decay = train_config.getfloat('train', 'epsilon_decay') checkpoint_interval = train_config.getint('train', 'checkpoint_interval') # configure trainer and explorer memory = ReplayMemory(capacity) model = policy.get_model() batch_size = train_config.getint('trainer', 'batch_size') trainer = Trainer(policy, memory, device, batch_size) explorer = Explorer(env, robot, device, memory, policy.gamma, target_policy=policy) # imitation learning if args.resume: if not os.path.exists(rl_weight_file): logging.error('RL weights does not exist') model.load_state_dict(torch.load(rl_weight_file)) rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth') logging.info( 'Load reinforcement learning trained weights. Resume training') elif os.path.exists(il_weight_file): model.load_state_dict(torch.load(il_weight_file)) logging.info('Load imitation learning trained weights.') else: il_episodes = train_config.getint('imitation_learning', 'il_episodes') il_policy = train_config.get('imitation_learning', 'il_policy') il_epochs = train_config.getint('imitation_learning', 'il_epochs') il_learning_rate = train_config.getfloat('imitation_learning', 'il_learning_rate') trainer.set_learning_rate(il_learning_rate) if robot.visible: safety_space = 0 else: safety_space = train_config.getfloat('imitation_learning', 'safety_space') il_policy = policy_factory[il_policy]() il_policy.multiagent_training = policy.multiagent_training il_policy.safety_space = safety_space robot.set_policy(il_policy) explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True) trainer.optimize_epoch(il_epochs) torch.save(model.state_dict(), il_weight_file) logging.info('Finish imitation learning. Weights saved.') logging.info('Experience set size: %d/%d', len(memory), memory.capacity) explorer.update_target_model(model) # reinforcement learning policy.set_env(env) robot.set_policy(policy) robot.print_info() trainer.set_learning_rate(rl_learning_rate) # fill the memory pool with some RL experience if args.resume: robot.policy.set_epsilon(epsilon_end) explorer.run_episode('val', video_file=f'data/output/video_e{-1}.mp4') explorer.run_k_episodes(100, 'train', update_memory=True, episode=0) logging.info('Experience set size: %d/%d', len(memory), memory.capacity) episode = 0 while episode < train_episodes: if args.resume: epsilon = epsilon_end else: if episode < epsilon_decay: epsilon = epsilon_start + ( epsilon_end - epsilon_start) / epsilon_decay * episode else: epsilon = epsilon_end robot.policy.set_epsilon(epsilon) # sample k episodes into memory and optimize over the generated memory explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode) trainer.optimize_batch(train_batches) # evaluate the model if episode % evaluation_interval == 0: explorer.run_episode( 'val', video_file=f'data/output/video_e{episode}.mp4') episode += 1 if episode % target_update_interval == 0: explorer.update_target_model(model) if episode != 0 and episode % checkpoint_interval == 0: torch.save(model.state_dict(), rl_weight_file) # final test explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode)
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='data/output/env.config') parser.add_argument('--policy_config', type=str, default='data/output/policy.config') parser.add_argument('--policy', type=str, default='scr') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=False, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=None) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--plot_file', type=str, default=None) args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error('Trainable policy must be specified with a model weights directory') policy.get_model().load_state_dict(torch.load(model_weights)) # configure environment env = gym.make('CrowdSim-v0') env.configure(args.env_config) robot = Robot() robot.configure(args.env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) humans = [Human() for _ in range(env.human_num)] for human in humans: human.configure(args.env_config, 'humans') env.set_humans(humans) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() ob = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) observation_subscribers = [] if args.plot_file: plotter = Plotter(args.plot_file) observation_subscribers.append(plotter) if args.video_file: video = Video(args.video_file) observation_subscribers.append(video) t = 0 while not done: action = robot.act(ob) ob, _, done, info = env.step(action) notify(observation_subscribers, env.state) if args.visualize: env.render() plt.pause(.001) current_pos = np.array(robot.get_position()) logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos t += 1 if args.plot_file: plotter.save() if args.video_file: video.make([robot.policy.draw_attention, robot.policy.draw_observation]) logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info) if robot.visible and info == 'reach goal': human_times = env.get_human_times() logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))