Esempio n. 1
0
    parser.add_argument('--output', help='output directory for submission', default='output_dir')
    parser.add_argument('--episode', help='number of episode to eval', default=100, type=int)
    args = parser.parse_args()

    ENV_NAME = args.env
    logger.info("Environment Name: {}".format(ENV_NAME))
    NUM_ACTIONS = get_player().action_space.n
    logger.info("Number of actions: {}".format(NUM_ACTIONS))

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(PredictConfig(
            model=Model(),
            session_init=get_model_loader(args.load),
            input_names=['state'],
            output_names=['policy']))
        if args.task == 'play':
            play_n_episodes(get_player(train=False), pred,
                            args.episode, render=True)
        elif args.task == 'eval':
            eval_model_multithread(pred, args.episode, get_player)
        elif args.task == 'dump_video':
            play_n_episodes(
                get_player(train=False, dumpdir=args.output),
                pred, args.episode)
    else:
        train()
    NUM_ACTIONS = get_player().action_space.n
    logger.info("Number of actions: {}".format(NUM_ACTIONS))

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['policy']))
        if args.task == 'play':
            play_n_episodes(get_player(train=False),
                            pred,
                            args.episode,
                            render=True)
        elif args.task == 'eval':
            eval_model_multithread(pred, args.episode, get_player)
        elif args.task == 'gen_submit':
            play_n_episodes(get_player(train=False, dumpdir=args.output), pred,
                            args.episode)
            # gym.upload(args.output, api_key='xxx')
    else:
        dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
        logger.set_logger_dir(dirname)

        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        trainer = SimpleTrainer(
Esempio n. 3
0
    ENV_NAME = args.env
    USE_GYM = not ENV_NAME.endswith('.bin')

    # set num_actions
    num_actions = get_player().action_space.n
    logger.info("ENV: {}, Num Actions: {}".format(args.env, num_actions))

    state_shape = IMAGE_SIZE + (3, ) if USE_GYM else IMAGE_SIZE
    model = Model(state_shape, FRAME_HISTORY, args.algo, num_actions)

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=model,
                          session_init=SmartInit(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))
        if args.task == 'play':
            play_n_episodes(get_player(viz=0.01), pred, 100, render=True)
        elif args.task == 'eval':
            eval_model_multithread(pred, args.num_eval, get_player)
    else:
        logger.set_logger_dir(
            os.path.join(
                'train_log',
                'DQN-{}'.format(os.path.basename(args.env).split('.')[0])))
        config = get_config(model)
        config.session_init = SmartInit(args.load)
        launch_train_with_config(config, SimpleTrainer())
Esempio n. 4
0
         session_init=get_model_loader(args.load),
         input_names=state_names,
         output_names=qvalue_names,
     )
 )
 # demo pretrained model one episode at a time
 if args.task == "play":
     play_n_episodes(
         get_player(
             files_list=args.files,
             viz=0,
             saveGif=args.saveGif,
             saveVideo=args.saveVideo,
             task="play",
             agents=args.agents,
             fiducials=args.fiducials,
             infDir=args.inferDir,
         ),
         pred,
         num_files,
         agents=args.agents,
         fidname=[fidNumToName[i] for i in args.fiducials],
         infDir=args.inferDir,
     )
 # run episodes in parallel and evaluate pretrained model
 elif args.task == "eval":
     play_n_episodes(
         get_player(
             files_list=args.files,
             viz=0,
             saveGif=args.saveGif,
Esempio n. 5
0
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        cfg = PredictConfig(model=Model(),
                            session_init=get_model_loader(args.load),
                            input_names=['state'],
                            output_names=['policy'])
        if args.task == 'play':
            play_model(cfg, get_player(viz=0.01))
        elif args.task == 'eval':
            eval_model_multithread(cfg, args.episode, get_player)
        elif args.task == 'gen_submit':
            play_n_episodes(get_player(train=False, dumpdir=args.output),
                            OfflinePredictor(cfg), args.episode)
            # gym.upload(output, api_key='xxx')
    else:
        dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
        logger.set_logger_dir(dirname)

        nr_gpu = get_nr_gpu()
        trainer = QueueInputTrainer
        if nr_gpu > 0:
            if nr_gpu > 1:
                predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
            else:
                predict_tower = [0]
            PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
            train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
            logger.info(
    NUM_ACTIONS = get_player().action_space.n
    logger.info("Environment: {}, number of actions: {}".format(
        ENV_NAME, NUM_ACTIONS))

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=SmartInit(args.load),
                          input_names=['state'],
                          output_names=['policy']))
        if args.task == 'play':
            filename = "/mnt/research/judy/reward_shaping/expert_data/batch_{}.npz".format(
                args.save_id)
            play_n_episodes(get_player(train=False),
                            pred,
                            args.episode,
                            render=args.render,
                            save=args.save,
                            filename=filename)
        elif args.task == 'eval':
            eval_model_multithread(pred, args.episode, get_player)
        elif args.task == 'dump_video':
            play_n_episodes(get_player(train=False, dumpdir=args.output), pred,
                            args.episode)
    else:
        train()
Esempio n. 7
0
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    ENV_NAME = args.env
    # USE_GYM = not ENV_NAME.endswith('.bin')
    IMAGE_CHANNEL = 2 if USE_GYM else 2
    METHOD = args.algo
    # set num_actions
    NUM_ACTIONS = get_player().action_space.n
    logger.info("ENV: {}, Num Actions: {}".format(ENV_NAME, NUM_ACTIONS))

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))
        if args.task == 'play':
            play_n_episodes(get_player(viz=0.01), pred, 100)
        elif args.task == 'eval':
            eval_model_multithread(pred, EVAL_EPISODE, get_player)
    else:
        logger.set_logger_dir(
            os.path.join(
                'train_log',
                'DQN-{}'.format(os.path.basename(ENV_NAME).split('.')[0])))
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        launch_train_with_config(config, SimpleTrainer())
Esempio n. 8
0
        agents=args.agents,
        reward_strategy=args.reward_strategy)
    NUM_ACTIONS = init_player.action_space.n
    num_files = init_player.files.num_files

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))
        # demo pretrained model one episode at a time
        if args.task == 'play' or args.task == 'eval':
            play_n_episodes(
                get_player(files_list=args.files,
                           viz=0.01,
                           saveGif=args.saveGif,
                           saveVideo=args.saveVideo,
                           task=args.task,
                           agents=args.agents,
                           reward_strategy=args.reward_strategy), pred,
                num_files)
    else:  # train model
        logger_dir = os.path.join(args.logDir, args.name)
        logger.set_logger_dir(logger_dir)
        config = get_config(args.files)
        if args.load:  # resume training from a saved checkpoint
            config.session_init = get_model_loader(args.load)
        launch_train_with_config(config, SimpleTrainer())
Esempio n. 9
0
                        choices=['DQN', 'Double', 'Dueling'], default='Double')
    args = parser.parse_args()

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    ROM_FILE = args.rom
    METHOD = args.algo
    # set num_actions
    NUM_ACTIONS = AtariPlayer(ROM_FILE).action_space.n
    logger.info("ROM: {}, Num Actions: {}".format(ROM_FILE, NUM_ACTIONS))

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(PredictConfig(
            model=Model(),
            session_init=get_model_loader(args.load),
            input_names=['state'],
            output_names=['Qvalue']))
        if args.task == 'play':
            play_n_episodes(get_player(viz=0.01), pred, 100)
        elif args.task == 'eval':
            eval_model_multithread(pred, EVAL_EPISODE, get_player)
    else:
        logger.set_logger_dir(
            os.path.join('train_log', 'DQN-{}'.format(
                os.path.basename(ROM_FILE).split('.')[0])))
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        launch_train_with_config(config, SimpleTrainer())
Esempio n. 10
0
    NUM_ACTIONS = init_player.action_space.n
    num_files = init_player.files.num_files

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))

        # demo pretrained model one episode at a time
        if args.task == 'play':
            play_n_episodes(
                get_player(files_list=args.files,
                           viz=0.01,
                           saveGif=args.saveGif,
                           saveVideo=args.saveVideo,
                           task='play'), pred, num_files)

        # run episodes in parallel and evaluate pretrained model
        elif args.task == 'eval':
            play_n_episodes(
                get_player(files_list=args.files,
                           viz=0.01,
                           saveGif=args.saveGif,
                           saveVideo=args.saveVideo,
                           task='eval'), pred, num_files)
    else:  # train model
        logger_dir = os.path.join(args.logDir, args.name)
        logger.set_logger_dir(logger_dir)
        config = get_config(args.files)
Esempio n. 11
0
 pred = OfflinePredictor(
     PredictConfig(
         model=Model(agents=args.agents),
         session_init=get_model_loader(args.load),
         input_names=state_names,
         output_names=qvalue_names,
     ))
 # demo pretrained model one episode at a time
 if args.task == "play":
     play_n_episodes(
         get_player(
             files_list=args.files,
             viz=0.01,
             saveGif=args.saveGif,
             saveVideo=args.saveVideo,
             task="play",
             agents=args.agents,
         ),
         pred,
         num_files,
         agents=args.agents,
     )
 # run episodes in parallel and evaluate pretrained model
 elif args.task == "eval":
     play_n_episodes(
         get_player(
             files_list=args.files,
             viz=0,
             saveGif=args.saveGif,
             saveVideo=args.saveVideo,
             task="eval",
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        cfg = PredictConfig(
            model=Model(),
            session_init=get_model_loader(args.load),
            input_names=['state'],
            output_names=['policy'])
        if args.task == 'play':
            play_model(cfg, get_player(connection=(args.ip, args.port), viz=0.01))
            # pl = get_player(connection=(args.ip, args.port))
            # pl.action(0)
            # print(pl.current_state())
        elif args.task == 'eval':
            eval_model_multithread(cfg, args.episode, get_player)
        elif args.task == 'gen_submit':
            play_n_episodes(
                get_player(connection=(args.ip, args.port), train=False, dumpdir=args.output),
                OfflinePredictor(cfg), args.episode)
            # gym.upload(output, api_key='xxx')
    else:
        dirname = os.path.join('train_log', 'train-unity3d-{}'.format(ENV_NAME))
        logger.set_logger_dir(dirname)

        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        trainer = QueueInputTrainer if config.nr_tower == 1 else AsyncMultiGPUTrainer
        trainer(config).train()
Esempio n. 13
0
    # IMAGE_CHANNEL = 3 if USE_GYM else 1
    # IMAGE_CHANNEL = 5
    METHOD = args.algo
    # set num_actions
    # NUM_ACTIONS = get_player().action_space.n
    raw_list, lbl_list = init_data()
    NUM_ACTIONS = np.prod(get_player().agent_out_shape)
    logger.info("ENV: {}, Num Actions: {}".format(ENV_NAME, NUM_ACTIONS))

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))
        if args.task == 'play':
            # play_n_episodes(get_player(viz=0.01), pred, 100)
            play_n_episodes(get_player(), pred, 100)
        elif args.task == 'eval':
            eval_model_multithread(pred, EVAL_EPISODE, get_player)
    else:
        logger.set_logger_dir(
            os.path.join(
                'train_log', METHOD +
                '-DQN-{}'.format(os.path.basename(ENV_NAME).split('.')[0])))
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        launch_train_with_config(config, SimpleTrainer())
Esempio n. 14
0
    NUM_ACTIONS = init_player.action_space.n
    num_files = init_player.files.num_files

    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state'],
                          output_names=['Qvalue']))
        # demo pretrained model one episode at a time
        if args.task == 'play':
            play_n_episodes(
                get_player(directory=data_dir,
                           files_list=test_list,
                           viz=0.01,
                           saveGif=args.saveGif,
                           saveVideo=args.saveVideo,
                           task='play'), pred, num_files)
        # run episodes in parallel and evaluate pretrained model
        elif args.task == 'eval':
            play_n_episodes(
                get_player(directory=data_dir,
                           files_list=eval_list,
                           viz=0.01,
                           saveGif=args.saveGif,
                           saveVideo=args.saveVideo,
                           task='eval'), pred, num_files)
    else:  # train model
        logger.set_logger_dir(logger_dir)
        config = get_config()