choices=['DQN', 'Double', 'Dueling'], default='Double') args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None ROM_FILE = args.rom METHOD = args.algo # set num_actions pl = AtariPlayer(ROM_FILE, viz=False) NUM_ACTIONS = pl.get_action_space().num_actions() del pl if args.task != 'train': cfg = PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['Qvalue']) if args.task == 'play': play_model(cfg, get_player(viz=0.01)) elif args.task == 'eval': eval_model_multithread(cfg, EVAL_EPISODE, get_player) else: config = get_config() if args.load: config.session_init = SaverRestore(args.load) QueueInputTrainer(config).train()
ENV_NAME = args.env p = get_player(); del p # set NUM_ACTIONS if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None if args.task != 'train': cfg = PredictConfig( model=Model(), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['logits:0']) if args.task == 'play': play_model(cfg) elif args.task == 'eval': eval_model_multithread(cfg, EVAL_EPISODE) else: nr_gpu = get_nr_gpu() if nr_gpu > 1: predict_tower = range(nr_gpu)[-nr_gpu/2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU config = get_config() if args.load: config.session_init = SaverRestore(args.load) config.tower = range(nr_gpu)[:-nr_gpu/2] or [0] logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, config.tower)), ','.join(map(str, predict_tower))))
ENV_NAME = args.env logger.info("Environment Name: {}".format(ENV_NAME)) NUM_ACTIONS = get_player().get_action_space().num_actions() logger.info("Number of actions: {}".format(NUM_ACTIONS)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None cfg = PredictConfig(model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy']) if args.task == 'play': play_model(cfg, get_player(viz=None), args.env, args.num_heads, args.vis_load_dir) elif args.task == 'eval': eval_model_multithread(cfg, args.episode, get_player) elif args.task == 'gen_submit': play_n_episodes(get_player(train=False, dumpdir=args.output), OfflinePredictor(cfg), args.episode) # gym.upload(output, api_key='xxx') else: dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) nr_gpu = get_nr_gpu() trainer = QueueInputTrainer if nr_gpu > 0: if nr_gpu > 1: predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode parser.add_argument('--load', help='load model') parser.add_argument('--task', help='task to perform', choices=['play', 'eval', 'train'], default='train') parser.add_argument('--rom', help='atari rom', required=True) args = parser.parse_args() if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None ROM_FILE = args.rom if args.task != 'train': cfg = PredictConfig( model=Model(), session_init=SaverRestore(args.load), input_var_names=['state'], output_var_names=['fct/output:0']) if args.task == 'play': play_model(cfg) elif args.task == 'eval': eval_model_multithread(cfg, EVAL_EPISODE) else: config = get_config() if args.load: config.session_init = SaverRestore(args.load) QueueInputTrainer(config).train()
logger.info("Environment Name: {}".format(ENV_NAME)) NUM_ACTIONS = get_player(connection=None).get_action_space().num_actions() logger.info("Number of actions: {}".format(NUM_ACTIONS)) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu if args.task != 'train': assert args.load is not None cfg = PredictConfig( model=Model(), session_init=get_model_loader(args.load), input_names=['state'], output_names=['policy']) if args.task == 'play': play_model(cfg, get_player(connection=(args.ip, args.port), viz=0.01)) # pl = get_player(connection=(args.ip, args.port)) # pl.action(0) # print(pl.current_state()) elif args.task == 'eval': eval_model_multithread(cfg, args.episode, get_player) elif args.task == 'gen_submit': play_n_episodes( get_player(connection=(args.ip, args.port), train=False, dumpdir=args.output), OfflinePredictor(cfg), args.episode) # gym.upload(output, api_key='xxx') else: dirname = os.path.join('train_log', 'train-unity3d-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) config = get_config()