Example #1
0
                        choices=['DQN', 'Double', 'Dueling'],
                        default='Double')
    args = parser.parse_args()

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if args.task != 'train':
        assert args.load is not None
    ROM_FILE = args.rom
    METHOD = args.algo

    # set num_actions
    pl = AtariPlayer(ROM_FILE, viz=False)
    NUM_ACTIONS = pl.get_action_space().num_actions()
    del pl

    if args.task != 'train':
        cfg = PredictConfig(model=Model(),
                            session_init=get_model_loader(args.load),
                            input_names=['state'],
                            output_names=['Qvalue'])
        if args.task == 'play':
            play_model(cfg, get_player(viz=0.01))
        elif args.task == 'eval':
            eval_model_multithread(cfg, EVAL_EPISODE, get_player)
    else:
        config = get_config()
        if args.load:
            config.session_init = SaverRestore(args.load)
        QueueInputTrainer(config).train()
Example #2
0
    ENV_NAME = args.env
    p = get_player(); del p    # set NUM_ACTIONS

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if args.task != 'train':
        assert args.load is not None

    if args.task != 'train':
        cfg = PredictConfig(
                model=Model(),
                session_init=SaverRestore(args.load),
                input_var_names=['state'],
                output_var_names=['logits:0'])
        if args.task == 'play':
            play_model(cfg)
        elif args.task == 'eval':
            eval_model_multithread(cfg, EVAL_EPISODE)
    else:
        nr_gpu = get_nr_gpu()
        if nr_gpu > 1:
            predict_tower = range(nr_gpu)[-nr_gpu/2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        config = get_config()
        if args.load:
            config.session_init = SaverRestore(args.load)
        config.tower = range(nr_gpu)[:-nr_gpu/2] or [0]
        logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, config.tower)), ','.join(map(str, predict_tower))))
Example #3
0
    ENV_NAME = args.env
    logger.info("Environment Name: {}".format(ENV_NAME))
    NUM_ACTIONS = get_player().get_action_space().num_actions()
    logger.info("Number of actions: {}".format(NUM_ACTIONS))

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        cfg = PredictConfig(model=Model(),
                            session_init=get_model_loader(args.load),
                            input_names=['state'],
                            output_names=['policy'])
        if args.task == 'play':
            play_model(cfg, get_player(viz=None), args.env, args.num_heads,
                       args.vis_load_dir)
        elif args.task == 'eval':
            eval_model_multithread(cfg, args.episode, get_player)
        elif args.task == 'gen_submit':
            play_n_episodes(get_player(train=False, dumpdir=args.output),
                            OfflinePredictor(cfg), args.episode)
            # gym.upload(output, api_key='xxx')
    else:
        dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
        logger.set_logger_dir(dirname)

        nr_gpu = get_nr_gpu()
        trainer = QueueInputTrainer
        if nr_gpu > 0:
            if nr_gpu > 1:
                predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
Example #4
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
    parser.add_argument('--load', help='load model')
    parser.add_argument('--task', help='task to perform',
            choices=['play', 'eval', 'train'], default='train')
    parser.add_argument('--rom', help='atari rom', required=True)
    args = parser.parse_args()

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if args.task != 'train':
        assert args.load is not None
    ROM_FILE = args.rom

    if args.task != 'train':
        cfg = PredictConfig(
                model=Model(),
                session_init=SaverRestore(args.load),
                input_var_names=['state'],
                output_var_names=['fct/output:0'])
        if args.task == 'play':
            play_model(cfg)
        elif args.task == 'eval':
            eval_model_multithread(cfg, EVAL_EPISODE)
    else:
        config = get_config()
        if args.load:
            config.session_init = SaverRestore(args.load)
        QueueInputTrainer(config).train()

    logger.info("Environment Name: {}".format(ENV_NAME))
    NUM_ACTIONS = get_player(connection=None).get_action_space().num_actions()
    logger.info("Number of actions: {}".format(NUM_ACTIONS))

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    if args.task != 'train':
        assert args.load is not None
        cfg = PredictConfig(
            model=Model(),
            session_init=get_model_loader(args.load),
            input_names=['state'],
            output_names=['policy'])
        if args.task == 'play':
            play_model(cfg, get_player(connection=(args.ip, args.port), viz=0.01))
            # pl = get_player(connection=(args.ip, args.port))
            # pl.action(0)
            # print(pl.current_state())
        elif args.task == 'eval':
            eval_model_multithread(cfg, args.episode, get_player)
        elif args.task == 'gen_submit':
            play_n_episodes(
                get_player(connection=(args.ip, args.port), train=False, dumpdir=args.output),
                OfflinePredictor(cfg), args.episode)
            # gym.upload(output, api_key='xxx')
    else:
        dirname = os.path.join('train_log', 'train-unity3d-{}'.format(ENV_NAME))
        logger.set_logger_dir(dirname)

        config = get_config()