コード例 #1
0
ファイル: run_atari.py プロジェクト: Divyankpandey/baselines
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy
    env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4)
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close()
コード例 #2
0
ファイル: run_atari.py プロジェクト: Divyankpandey/baselines
def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu):
    env = make_atari_env(env_id, num_cpu, seed)
    if policy == 'cnn':
        policy_fn = AcerCnnPolicy
    elif policy == 'lstm':
        policy_fn = AcerLstmPolicy
    else:
        print("Policy {} not implemented".format(policy))
        return
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule)
    env.close()
コード例 #3
0
ファイル: run_atari.py プロジェクト: Divyankpandey/baselines
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()

    env = VecFrameStack(make_atari_env(env_id, 8, seed), 4)
    policy = {'cnn' : CnnPolicy, 'lstm' : LstmPolicy, 'lnlstm' : LnLstmPolicy, 'mlp': MlpPolicy}[policy]
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=4, log_interval=1,
        ent_coef=.01,
        lr=lambda f : f * 2.5e-4,
        cliprange=lambda f : f * 0.1,
        total_timesteps=int(num_timesteps * 1.1))
コード例 #4
0
ファイル: run_atari.py プロジェクト: xzblueofsky/baselines
def train(env_id, num_timesteps, seed, num_cpu):
    env = VecFrameStack(make_atari_env(env_id, num_cpu, seed), 4)
    policy_fn = CnnPolicy
    learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), nprocs=num_cpu)
    env.close()
コード例 #5
0
from arguments import get_args
from ppo_agent import ppo_agent
from baselines.common.cmd_util import make_atari_env
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from models import CNN_Net
from baselines import logger
import os

if __name__ == '__main__':
    args = get_args()
    if not os.path.exists('logs/'):
        os.mkdir('logs/')
    log_path = 'logs/' + args.env_name + '/'
    if not os.path.exists(log_path):
        os.mkdir(log_path)
    # write log information
    logger.configure(dir=log_path)
    envs = VecFrameStack(
        make_atari_env(args.env_name, args.num_workers, args.seed), 4)
    network = CNN_Net(envs.action_space.n)
    ppo_trainer = ppo_agent(envs, args, network, 'atari')
    ppo_trainer.learn()
コード例 #6
0
from baselines.common.cmd_util import make_atari_env
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from arguments import achieve_arguments
from a2c_agent import a2c_agent
from baselines import logger

if __name__ == '__main__':
    args = achieve_arguments()
    logger.configure(dir=args.log_dir)
    # create environments
    envs = VecFrameStack(make_atari_env(args.env_name, args.num_processes, args.seed), 4)
    trainer = a2c_agent(envs, args)
    trainer.learn()
    envs.close()
コード例 #7
0
ファイル: run.py プロジェクト: rgalljamov/deephop
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)
    if env_type == 'mujoco':
        # todo: copy paste from akhil: create session instead of getting session
        get_session(
            tf.ConfigProto(allow_soft_placement=True,
                           intra_op_parallelism_threads=1,
                           inter_op_parallelism_threads=1))

        # always using dummy environment should allow running saved models without any further changes!
        # env = DummyVecEnv([lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        if args.num_env:
            env = SubprocVecEnv([
                lambda: make_mujoco_env(env_id, seed + i if seed is not None
                                        else None, args.reward_scale)
                for i in range(args.num_env)
            ])
        else:
            env = DummyVecEnv(
                [lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        # uncommented on Akhil's advice, as it is no longer necessary because I'm normalizing the data in my environment!
        env = VecNormalize(env)

    elif env_type == 'atari':
        if alg == 'acer':
            env = make_atari_env(env_id, nenv, seed)
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env,
                                               frame_stack=True,
                                               scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(
                env,
                logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_atari_env(env_id, nenv, seed),
                                frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(
            game=args.env,
            state=gamestate,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)

    elif env_type == 'classic_control':

        def make_env():
            e = gym.make(env_id)
            e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
            e.seed(seed)
            return e

        env = DummyVecEnv([make_env])

    else:
        raise ValueError('Unknown env_type {}'.format(env_type))

    return env
コード例 #8
0
def main():
    parser, clipargs_default_all, args_default_all = arg_parser_common()
    args = parser.parse_args()

    import json
    from dotmap import DotMap
    keys_exclude = [
        'coef_predict_task', 'is_multiprocess', 'n_envs', 'eval_interval',
        'n_steps', 'n_minibatches', 'play', 'n_eval_epsiodes', 'force_write',
        'kl2clip_sharelogstd', 'policy_variance_state_dependent',
        'kl2clip_clip_clipratio', 'kl2clip_decay', 'lr', 'num_timesteps',
        'gradient_rectify', 'rectify_scale', 'kl2clip_clipcontroltype',
        'reward_scale', 'coef_predict_task', 'explore_additive_rate',
        'explore_additive_threshold', 'explore_timesteps', 'debug_halfcheetah',
        'name_project', 'env_pure', 'n_opt_epochs', 'coef_entropy',
        'log_interval', 'save_interval', 'save_debug', 'is_atari'
    ]
    # 'is_atari'

    #  -------------------- prepare args

    args.env_pure = args.env.split('-v')[0]

    # env_mujocos = 'InvertedPendulum,InvertedDoublePendulum,HalfCheetah,Hopper,Walker2d,Ant,Reacher,Swimmer,Humanoid'
    # env_mujocos = tools.str2list(env_mujocos)
    if not args.is_atari:
        env_type = MUJOCO
        if '-v' not in args.env:
            args.env = f'{args.env}-v2'
    else:
        env_type = ATARI
        if '-v' not in args.env:
            args.env = f'{args.env}-v4'
    tools.warn_(f'Run with setting for {env_type} task!!!!!')

    # --- set value of clipargs
    clipargs_default = clipargs_default_all[env_type]

    clipargs = clipargs_default[args.cliptype].copy()
    clipargs.update(args.clipargs)
    args.clipargs = clipargs

    # --- prepare other args
    # If the value of the following args are None, then it is setted by the following values
    args_default = args_default_all[env_type]
    args = DotMap(vars(args))
    print(
        "The followng arg value is None, thus they are setted by built-in value:"
    )

    for argname in args_default.keys():
        if args[argname] is None:
            if args.env_pure in args_default[argname].keys():
                args[argname] = args_default[argname][args.env_pure]
            else:
                args[argname] = args_default[argname]['_default']
            print(f"{argname}={args[argname]}")
    # print( json.dumps( args.toDict(), indent='\t') )
    # exit()
    # TODO prepare_dir: change .finish_indicator to finishi_indictator, which is more clear.
    # --- prepare dir
    import baselines
    root_dir = tools_logger.get_logger_dir('baselines', baselines, 'results')
    args = tools_logger.prepare_dirs(args,
                                     key_first='env',
                                     keys_exclude=keys_exclude,
                                     dirs_type=['log'],
                                     root_dir=root_dir)
    # --- prepare args for use
    args.cliptype = ClipType[args.cliptype]

    args.zip_dirs = ['model', 'monitor']
    for d in args.zip_dirs:
        args[f'{d}_dir'] = osp.join(args.log_dir, d)
        os.mkdir(args[f'{d}_dir'])

    from baselines.common import set_global_seeds
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from baselines.ppo2_AdaClip import ppo2
    # from baselines.ppo2_AdaClip import ppo2_kl2clip_conservative as ppo2
    import baselines.ppo2_AdaClip.policies as plcs
    import gym
    import tensorflow as tf
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    ncpu = 1
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True
    tf.Session(config=config).__enter__()

    set_global_seeds(args.seed)
    policy = getattr(plcs, args.policy_type)

    # ------ prepare env
    # args.eval_model = args.n_eval_epsiodes > 0
    if env_type == MUJOCO:

        def make_mujoco_env(rank=0):
            def _thunk():
                env = gym.make(args.env)
                env.seed(args.seed + rank)
                env = bench.Monitor(env,
                                    os.path.join(args.log_dir, 'monitor',
                                                 str(rank)),
                                    allow_early_resets=True)
                return env

            return _thunk

        if args.n_envs == 1:
            env = DummyVecEnv([make_mujoco_env()])
        else:
            from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
            env = SubprocVecEnv(
                [make_mujoco_env(i) for i in range(args.n_envs)])
        env = VecNormalize(env, reward_scale=args.reward_scale)

        env_test = None
        if args.n_eval_epsiodes > 0:
            if args.n_eval_epsiodes == 1:
                env_test = DummyVecEnv([make_mujoco_env()])
            else:
                from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
                env_test = SubprocVecEnv(
                    [make_mujoco_env(i) for i in range(args.n_eval_epsiodes)])
            env_test = VecNormalize(
                env_test, ret=False,
                update=False)  # It doesn't need to normalize return
    else:
        from baselines.common.vec_env.vec_frame_stack import VecFrameStack
        from baselines.common.cmd_util import make_atari_env
        env = VecFrameStack(
            make_atari_env(args.env, num_env=args.n_envs, seed=args.seed), 4)
        env_test = None
        #  TODO : debug VecFrame
        if args.n_eval_epsiodes > 0:
            env_test = VecFrameStack(
                make_atari_env(args.env,
                               num_env=args.n_eval_epsiodes,
                               seed=args.seed), 4)
            # env_test.reset()
            # env_test.render()
    # ----------- learn
    if env_type == MUJOCO:
        lr = args.lr
        # cliprange = args.clipargs.cliprange
    elif env_type == ATARI:
        lr = lambda f: f * args.lr
        # cliprange = lambda f: f*args.clipargs.cliprange if args.clipargs.cliprange is not None else None
    args.env_type = env_type
    ppo2.learn(policy=policy,
               env=env,
               env_eval=env_test,
               n_steps=args.n_steps,
               nminibatches=args.n_minibatches,
               lam=args.lam,
               gamma=0.99,
               n_opt_epochs=args.n_opt_epochs,
               log_interval=args.log_interval,
               ent_coef=args.coef_entropy,
               lr=lr,
               total_timesteps=args.num_timesteps,
               cliptype=args.cliptype,
               save_interval=args.save_interval,
               args=args)

    tools_logger.finish_dir(args.log_dir)
コード例 #9
0
def build_env(args, selector=None):
    global store
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)
    print(env_type, env_id, nenv, args.num_env)
    if env_type == 'mujoco':
        get_session(
            tf.ConfigProto(allow_soft_placement=True,
                           intra_op_parallelism_threads=1,
                           inter_op_parallelism_threads=1))

        if args.num_env:
            env = SubprocVecEnv([
                lambda: make_mujoco_env(env_id, seed + i if seed is not None
                                        else None, args.reward_scale)
                for i in range(args.num_env)
            ])
        else:
            env = DummyVecEnv(
                [lambda: make_mujoco_env(env_id, seed, args.reward_scale)])

        env = VecNormalize(env)

    elif env_type == 'atari':
        if alg == 'acer':
            env = make_atari_env(
                env_id, nenv, seed)  #, wrapper_kwargs={'clip_rewards': False})
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env,
                                               frame_stack=True,
                                               scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(
                env,
                logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        elif "Zelda" in env_id:
            sys.path.append(
                "/home/jupyter/Notebooks/Chang/HardRLWithYoutube/nnrunner/a2c_gvgai"
            )
            import nnrunner.a2c_gvgai.env as gvgai_env
            frame_stack_size = 4
            print("run zelda")
            env = VecFrameStack(
                gvgai_env.make_gvgai_env(env_id,
                                         nenv,
                                         seed,
                                         level_selector=selector,
                                         experiment="PE",
                                         dataset="zelda"), frame_stack_size)
            # env.reset()
            # store = env
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_atari_env(env_id, nenv, seed),
                                frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(
            game=args.env,
            state=gamestate,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)

    elif env_type == 'classic_control':

        def make_env():
            e = gym.make(env_id)
            e = bench.Monitor(e, logger.get_dir(), allow_early_resets=True)
            e.seed(seed)
            return e

        env = DummyVecEnv([make_env])

    else:
        raise ValueError('Unknown env_type {}'.format(env_type))

    # env.reset()
    print("build env")
    # store.reset()
    # store.reset()

    return env