Ejemplo n.º 1
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            print("are we here?")
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 2
0
def build_env(args):
    """
    Build gym environment

    Args:
        args: (argparse.ArgumentParser) parsed command line arguments
    """

    # number of cpu cores
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin':
        # for 'darwin' system halve the processor cores
        ncpu //= 2
    # set nenv = cpu if args.num_env == 0 or None
    nenv = args.num_env or ncpu
    # agent
    alg = args.alg
    # seed
    seed = args.seed
    # environment type & id
    env_type, env_id = get_env_type(args)

    if env_type in ['atari', 'retro']:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_env(env_id,
                           env_type,
                           nenv,
                           seed,
                           gamestate=args.gamestate,
                           reward_scale=args.reward_scale)
            # vectorize environments (TBD)

    # make environments for MuJoCo games (TBD)

    return env
Ejemplo n.º 3
0
def build_env(cloth_cfg_path=None,
              render_path=None,
              start_state_path=None,
              num_env=1,
              seed=1,
              alg='ddpg'):
    """Daniel: actually construct the env, using 'vector envs' for parallelism.
    For now our cloth env can follow the non-atari and non-retro stuff, because
    I don't think we need a similar kind of 'wrapping' that they do. Note that
    `VecFrameStack` is needed to stack frames, e.g., in Atari we do 4 frame
    stacking. Without that, the states would be size (84,84,1).
    The non-`args` parameters here are for the cloth env.
    """

    #Adi: Need to modify the next section because no 'args' parameter
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    #nenv = args.num_env or ncpu
    #alg = args.alg
    #seed = args.seed
    #env_type, env_id = get_env_type(args)
    env_type = 'cloth'
    env_id = 'cloth'

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)
        flatten_dict_observations = alg not in {'her'}
        #Adi: I don't think we want to make a vector environment for now because it's causing a lot of trouble temporarily.. let's just start with a single non-vec env
        #env = make_vec_env(env_id, env_type, num_env or 1, seed,
        #                   reward_scale=1,
        #                   flatten_dict_observations=flatten_dict_observations,
        #                   cloth_cfg_path=cloth_cfg_path,
        #                   render_path=render_path,
        #                   start_state_path=start_state_path)
        #Adi: I have to directly define a few more variables because we are now making a single environment instead of a vector environment
        #Adi: These values are subject to change
        mpi_rank = 0
        subrank = 0
        reward_scale = 1.0
        gamestate = None
        wrapper_kwargs = None
        logger_dir = logger.get_dir()
        env = make_env(env_id=env_id,
                       env_type=env_type,
                       mpi_rank=mpi_rank,
                       subrank=subrank,
                       seed=seed,
                       reward_scale=reward_scale,
                       gamestate=gamestate,
                       flatten_dict_observations=flatten_dict_observations,
                       wrapper_kwargs=wrapper_kwargs,
                       logger_dir=logger_dir,
                       cloth_cfg_path=cloth_cfg_path,
                       render_path=render_path,
                       start_state_path=start_state_path)
        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env