parser.add_argument('--output', help='output directory for submission', default='output_dir') parser.add_argument('--episode', help='number of episode to eval', default=100, type=int) args = parser.parse_args() ENV_NAME = args.env logger.info("Environment Name: {}".format(ENV_NAME)) NUM_ACTIONS = get_player().action_space.n logger.info("Number of actions: {}".format(NUM_ACTIONS)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None pred = OfflinePredictor(PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy'])) if args.task == 'play': play_n_episodes(get_player(train=False), pred, args.episode, render=True) elif args.task == 'eval': eval_model_multithread(pred, args.episode, get_player) elif args.task == 'dump_video': play_n_episodes( get_player(train=False, dumpdir=args.output), pred, args.episode) else: train()
NUM_ACTIONS = get_player().action_space.n logger.info("Number of actions: {}".format(NUM_ACTIONS)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy'])) if args.task == 'play': play_n_episodes(get_player(train=False), pred, args.episode, render=True) elif args.task == 'eval': eval_model_multithread(pred, args.episode, get_player) elif args.task == 'gen_submit': play_n_episodes(get_player(train=False, dumpdir=args.output), pred, args.episode) # gym.upload(args.output, api_key='xxx') else: dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) config = get_config() if args.load: config.session_init = get_model_loader(args.load) trainer = SimpleTrainer(
ENV_NAME = args.env USE_GYM = not ENV_NAME.endswith('.bin') # set num_actions num_actions = get_player().action_space.n logger.info("ENV: {}, Num Actions: {}".format(args.env, num_actions)) state_shape = IMAGE_SIZE + (3, ) if USE_GYM else IMAGE_SIZE model = Model(state_shape, FRAME_HISTORY, args.algo, num_actions) if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=model, session_init=SmartInit(args.load), input_names=['state'], output_names=['Qvalue'])) if args.task == 'play': play_n_episodes(get_player(viz=0.01), pred, 100, render=True) elif args.task == 'eval': eval_model_multithread(pred, args.num_eval, get_player) else: logger.set_logger_dir( os.path.join( 'train_log', 'DQN-{}'.format(os.path.basename(args.env).split('.')[0]))) config = get_config(model) config.session_init = SmartInit(args.load) launch_train_with_config(config, SimpleTrainer())
session_init=get_model_loader(args.load), input_names=state_names, output_names=qvalue_names, ) ) # demo pretrained model one episode at a time if args.task == "play": play_n_episodes( get_player( files_list=args.files, viz=0, saveGif=args.saveGif, saveVideo=args.saveVideo, task="play", agents=args.agents, fiducials=args.fiducials, infDir=args.inferDir, ), pred, num_files, agents=args.agents, fidname=[fidNumToName[i] for i in args.fiducials], infDir=args.inferDir, ) # run episodes in parallel and evaluate pretrained model elif args.task == "eval": play_n_episodes( get_player( files_list=args.files, viz=0, saveGif=args.saveGif,
if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None cfg = PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy']) if args.task == 'play': play_model(cfg, get_player(viz=0.01)) elif args.task == 'eval': eval_model_multithread(cfg, args.episode, get_player) elif args.task == 'gen_submit': play_n_episodes(get_player(train=False, dumpdir=args.output), OfflinePredictor(cfg), args.episode) # gym.upload(output, api_key='xxx') else: dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) nr_gpu = get_nr_gpu() trainer = QueueInputTrainer if nr_gpu > 0: if nr_gpu > 1: predict_tower = list(range(nr_gpu))[-nr_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info(
NUM_ACTIONS = get_player().action_space.n logger.info("Environment: {}, number of actions: {}".format( ENV_NAME, NUM_ACTIONS)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=SmartInit(args.load), input_names=['state'], output_names=['policy'])) if args.task == 'play': filename = "/mnt/research/judy/reward_shaping/expert_data/batch_{}.npz".format( args.save_id) play_n_episodes(get_player(train=False), pred, args.episode, render=args.render, save=args.save, filename=filename) elif args.task == 'eval': eval_model_multithread(pred, args.episode, get_player) elif args.task == 'dump_video': play_n_episodes(get_player(train=False, dumpdir=args.output), pred, args.episode) else: train()
if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu ENV_NAME = args.env # USE_GYM = not ENV_NAME.endswith('.bin') IMAGE_CHANNEL = 2 if USE_GYM else 2 METHOD = args.algo # set num_actions NUM_ACTIONS = get_player().action_space.n logger.info("ENV: {}, Num Actions: {}".format(ENV_NAME, NUM_ACTIONS)) if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) if args.task == 'play': play_n_episodes(get_player(viz=0.01), pred, 100) elif args.task == 'eval': eval_model_multithread(pred, EVAL_EPISODE, get_player) else: logger.set_logger_dir( os.path.join( 'train_log', 'DQN-{}'.format(os.path.basename(ENV_NAME).split('.')[0]))) config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config(config, SimpleTrainer())
agents=args.agents, reward_strategy=args.reward_strategy) NUM_ACTIONS = init_player.action_space.n num_files = init_player.files.num_files if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) # demo pretrained model one episode at a time if args.task == 'play' or args.task == 'eval': play_n_episodes( get_player(files_list=args.files, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task=args.task, agents=args.agents, reward_strategy=args.reward_strategy), pred, num_files) else: # train model logger_dir = os.path.join(args.logDir, args.name) logger.set_logger_dir(logger_dir) config = get_config(args.files) if args.load: # resume training from a saved checkpoint config.session_init = get_model_loader(args.load) launch_train_with_config(config, SimpleTrainer())
choices=['DQN', 'Double', 'Dueling'], default='Double') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu ROM_FILE = args.rom METHOD = args.algo # set num_actions NUM_ACTIONS = AtariPlayer(ROM_FILE).action_space.n logger.info("ROM: {}, Num Actions: {}".format(ROM_FILE, NUM_ACTIONS)) if args.task != 'train': assert args.load is not None pred = OfflinePredictor(PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) if args.task == 'play': play_n_episodes(get_player(viz=0.01), pred, 100) elif args.task == 'eval': eval_model_multithread(pred, EVAL_EPISODE, get_player) else: logger.set_logger_dir( os.path.join('train_log', 'DQN-{}'.format( os.path.basename(ROM_FILE).split('.')[0]))) config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config(config, SimpleTrainer())
NUM_ACTIONS = init_player.action_space.n num_files = init_player.files.num_files if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) # demo pretrained model one episode at a time if args.task == 'play': play_n_episodes( get_player(files_list=args.files, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task='play'), pred, num_files) # run episodes in parallel and evaluate pretrained model elif args.task == 'eval': play_n_episodes( get_player(files_list=args.files, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task='eval'), pred, num_files) else: # train model logger_dir = os.path.join(args.logDir, args.name) logger.set_logger_dir(logger_dir) config = get_config(args.files)
pred = OfflinePredictor( PredictConfig( model=Model(agents=args.agents), session_init=get_model_loader(args.load), input_names=state_names, output_names=qvalue_names, )) # demo pretrained model one episode at a time if args.task == "play": play_n_episodes( get_player( files_list=args.files, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task="play", agents=args.agents, ), pred, num_files, agents=args.agents, ) # run episodes in parallel and evaluate pretrained model elif args.task == "eval": play_n_episodes( get_player( files_list=args.files, viz=0, saveGif=args.saveGif, saveVideo=args.saveVideo, task="eval",
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None cfg = PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy']) if args.task == 'play': play_model(cfg, get_player(connection=(args.ip, args.port), viz=0.01)) # pl = get_player(connection=(args.ip, args.port)) # pl.action(0) # print(pl.current_state()) elif args.task == 'eval': eval_model_multithread(cfg, args.episode, get_player) elif args.task == 'gen_submit': play_n_episodes( get_player(connection=(args.ip, args.port), train=False, dumpdir=args.output), OfflinePredictor(cfg), args.episode) # gym.upload(output, api_key='xxx') else: dirname = os.path.join('train_log', 'train-unity3d-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) config = get_config() if args.load: config.session_init = get_model_loader(args.load) trainer = QueueInputTrainer if config.nr_tower == 1 else AsyncMultiGPUTrainer trainer(config).train()
# IMAGE_CHANNEL = 3 if USE_GYM else 1 # IMAGE_CHANNEL = 5 METHOD = args.algo # set num_actions # NUM_ACTIONS = get_player().action_space.n raw_list, lbl_list = init_data() NUM_ACTIONS = np.prod(get_player().agent_out_shape) logger.info("ENV: {}, Num Actions: {}".format(ENV_NAME, NUM_ACTIONS)) if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) if args.task == 'play': # play_n_episodes(get_player(viz=0.01), pred, 100) play_n_episodes(get_player(), pred, 100) elif args.task == 'eval': eval_model_multithread(pred, EVAL_EPISODE, get_player) else: logger.set_logger_dir( os.path.join( 'train_log', METHOD + '-DQN-{}'.format(os.path.basename(ENV_NAME).split('.')[0]))) config = get_config() if args.load: config.session_init = get_model_loader(args.load) launch_train_with_config(config, SimpleTrainer())
NUM_ACTIONS = init_player.action_space.n num_files = init_player.files.num_files if args.task != 'train': assert args.load is not None pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue'])) # demo pretrained model one episode at a time if args.task == 'play': play_n_episodes( get_player(directory=data_dir, files_list=test_list, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task='play'), pred, num_files) # run episodes in parallel and evaluate pretrained model elif args.task == 'eval': play_n_episodes( get_player(directory=data_dir, files_list=eval_list, viz=0.01, saveGif=args.saveGif, saveVideo=args.saveVideo, task='eval'), pred, num_files) else: # train model logger.set_logger_dir(logger_dir) config = get_config()