Esempio n. 1
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type == 'threepass':
        env = make_m_three_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'pass':
        env = make_multi_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'x_pass':
        env = make_x_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'leftward':
        env = make_m_leftward_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'island':
        env = make_m_island_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'x_island':
        env = make_m_x_island_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'pushball':
        env = make_m_pushball_env(env_id, env_type, nenv, seed, args)
    elif env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 2
0
def build_env(num_env,alg,seed,env_type,env_id,reward_scale,gamestate=None):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = num_env or ncpu

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=gamestate, reward_scale=reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env    
Esempio n. 3
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1,
                               gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.20))
       config.gpu_options.allow_growth = True
       get_session(config=config)
       
       flatten_dict_observations = alg not in {'her'}
       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
       
       normalize_value = args.normalize_value
       if (env_type == 'mujoco' or env_type=='roboschool') and normalize_value:
           env = VecNormalize(env)

    return env
Esempio n. 4
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

       if env_type == 'mujoco':
           env = VecNormalize(env)

    return env
Esempio n. 5
0
def build_env(args, extra_args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type == 'atari':
        if alg == 'acer':
            env = make_vec_env(env_id, env_type, nenv, seed)
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id, None)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id, None)
            env.seed(seed)
            env = bench.Monitor(
                env,
                logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            weights = extra_args['weights'] if 'weights' in extra_args else None
            env = VecFrameStack(
                make_vec_env(env_id, env_type, nenv, seed, weights=weights),
                frame_stack_size)
    return env
Esempio n. 6
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

            # Sgillen: added guillaume's fix for the vec normalize loading
            # ==============================================================================

            if args.__contains__('load_path'):
                sess = get_session()
                loaded_params = joblib.load(osp.expanduser(args.load_path))
                restores = []
                for v in tf.trainable_variables():
                    restores.append(v.assign(loaded_params[v.name]))
                sess.run(restores)
                env.ob_rms._set_mean_var_count()
                env.ret_rms._set_mean_var_count()
                # print(dir(env.ret_rms))

    return env
Esempio n. 7
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type == 'atari':
        if alg == 'acer':
            env = make_vec_env(env_id, env_type, nenv, seed)
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(
                env,
                logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed),
                                frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=args.env,
            state=gamestate,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)

    else:
        get_session(
            tf.ConfigProto(allow_soft_placement=True,
                           intra_op_parallelism_threads=1,
                           inter_op_parallelism_threads=1))

        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
Esempio n. 8
0
def build_env(args, train=True):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    alg = args.alg
    seed = args.seed
    env_type, env_id = get_env_type(args)

    if env_type in {'atari'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        else:
            frame_stack_size = 4
            if train:
                env = make_vec_env(env_id, env_type, args.num_env or ncpu, seed, reward_scale=args.reward_scale)
            else:
                env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        flatten_dict_observations = alg not in {'her'}
        if train:
            env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
        else:
            env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        # if env_type == 'mujoco':
        #     env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 9
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
Esempio n. 11
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        if args.env_json:
            with open(args.env_json) as f:
                env_kwargs = json.loads(f.read(
                ))  # need to corresponding to env.__init__ arguments
            env = make_vec_env(
                env_id,
                env_type,
                args.num_env or 1,
                seed,
                env_kwargs=env_kwargs,
                reward_scale=args.reward_scale,
                flatten_dict_observations=flatten_dict_observations)
        else:
            env = make_vec_env(
                env_id,
                env_type,
                args.num_env or 1,
                seed,
                reward_scale=args.reward_scale,
                flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 12
0
def build_env(args, silent_monitor, prio_args=None):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale,
                               prio_args=prio_args,
                               silent_monitor=silent_monitor)
            if prio_args is None:
                env = VecFrameStack(env, frame_stack_size)
            else:
                env = PrioVecFrameStack(env, frame_stack_size)

            # TODO prio vec frame stack

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        num_env = args.n_active_envs if prio_args is None else args.num_env
        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations,
                           prio_args=prio_args,
                           silent_monitor=silent_monitor)

        if env_type == 'mujoco':
            if prio_args is None:
                env = VecNormalize(env)
            else:
                env = PrioVecNormalize(env)

    return env
Esempio n. 13
0
def build_env(args, normalize_ob=True, is_eval=False):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    #nenv = num_env or ncpu
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            #env = make_vec_env(env_id, env_type, 1, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        '''gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(allow_soft_placement=True,
                               gpu_options=gpu_options,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)'''
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True

        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}

        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            logger.log('build_env: normalize_ob', normalize_ob)
            #sys.exit()
            env = VecNormalize(env,
                               ob=normalize_ob,
                               is_training=not is_eval,
                               use_tf=True)
    return env
Esempio n. 14
0
def build_env(args, extra_args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    elif env_type == "custom":
        try:
            if extra_args["step_size"] == "hour":
                env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            elif extra_args["step_size"] == "day":
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            else:
                print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.")
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
        except KeyError as e:
            raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.")

        # wrap it
        #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M')
        #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp)
        logger_dir = logger.get_dir()
        # hard coded mpi_rank and subrank to 0
        env = Monitor(env,
                      logger_dir and os.path.join(logger_dir, "0.0"),
                      allow_early_resets=True)
        env = DummyVecEnv([lambda: env])
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 15
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    print(env_id)
    #extract the agc_env_name
    noskip_idx = env_id.find("NoFrameskip")
    env_name = env_id[:noskip_idx].lower()
    print("Env Name for Masking:", env_name)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

    if args.custom_reward != '':
        from baselines.common.vec_env import VecEnv, VecEnvWrapper
        import baselines.common.custom_reward_wrapper as W
        assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)

        custom_reward_kwargs = eval(args.custom_reward_kwargs)

        if args.custom_reward == 'pytorch':
            if args.custom_reward_path == '':
                assert False, 'no path for reward model'
            else:
                env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name)
        else:
            assert False, 'no such wrapper exist'

    if env_type == 'mujoco':
        env = VecNormalize(env)
    # if env_type == 'atari':
    #     input("Normalizing for ATari game: okay? [Enter]")
    #     #normalize rewards but not observations for atari
    #     env = VecNormalizeRewards(env)

    return env
Esempio n. 16
0
def build_env(args, seed):
    nenv = 1
    alg = args.alg
    # seed = args.seed
    seed = int(np.random.rand(1) * 101000)
    print(seed)

    env_type, env_id = get_env_type(args.env)
    set_global_seeds(seed)
    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        # config = tf.ConfigProto(allow_soft_placement=True,
        #                        intra_op_parallelism_threads=1,
        #                        inter_op_parallelism_threads=1)
        # config.gpu_options.allow_growth = True
        # get_session(config=config)
        sess = tf.InteractiveSession()
        # env = VecNormalize(make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale))

        env = make_vec_env(env_id,
                           env_type,
                           args.numenv,
                           seed,
                           reward_scale=args.reward_scale)
        evalenv = make_vec_env(env_id,
                               env_type,
                               args.numenv,
                               seed,
                               reward_scale=args.reward_scale)

        # if env_type == 'mujoco':
        #     env = VecNormalize(env)
        #     evalenv = VecNormalizeEval(evalenv)
        #     evalenv.ob_rms = env.ob_rms
        #     evalenv.ret_rms = env.ret_rms

    return env, sess, evalenv
Esempio n. 17
0
def build_env(args, game_name, method_name, tag):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin':
        ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            logger_dir_path = 'test_logs_{}/{}/{}'.format(
                tag, game_name, method_name)
            logger_dir_exist = os.path.exists(logger_dir_path)
            if not logger_dir_exist:
                os.makedirs(logger_dir_path)
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           logger_dir=logger_dir_path,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 18
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed
    play = args.play
    mode = args.mode
    multiplayer = args.multiplayer
    env_type, env_id = get_env_type(args)
    isSpaceInvaders = False
    if "SpaceInvaders" in args.env:
        isSpaceInvaders = True
    if env_type in {'atari', 'retro'}:
        # this should be the only algorithm I'll use
        if alg == 'deepq':
            # BEGIN MY CODE
            # clip reward when training
            # don't clip when playing to see actual score
            # add mode in as an environment parameter
            if play:
                # if I'm playing to see how well the network scores, I want to unclip rewards
                env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode})
            else:
                # otherwise, keep the basic reward used by the base algorithm
                if multiplayer and isSpaceInvaders:
                    # unclip rewards for space invaders multiplayer, I'll do it manually.
                    env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode})
                else:
                    env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': True}, env_kwargs={'game_mode': mode})
            # END MY CODE
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 19
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        sess = get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)
        ## Failing to write structure---is it defined in another process?
        #print('writing session graph--I HOPEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE')
        #print(sess.graph)
        #outfile = osp.join(args.storspot, 'tf') if args.storspot else './tf'
        #file_writer = tf.summary.FileWriter(outfile, sess.graph)
        #summary_op = tf.summary.merge_all()

    return env
Esempio n. 20
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        args_dict = vars(args)
        del args_dict["seed"]

        #so that seed won't overwrite the method given by gym.Env class
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations,
                           env_kwargs=args_dict)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 21
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu  #TODO: removed or ncpu
    print("ncpu = {}".format(ncpu))
    print("Nenv = {}".format(nenv))
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        print("TF config starting...")
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        print("Get session ...")
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        print("Starting make_vec_env")
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 22
0
def build_env(args):
    logger = logging.getLogger()
    coloredlogs.install(level='DEBUG', fmt='%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s')
    logger.setLevel(logging.DEBUG)
    
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        # TODO: Ensure willuse GPU when sent to SLURM (Add as a command-line argument)
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)


        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    if env_id == "MsPacmanNoFrameskip-v4":
        env = super_simple_dqn_wrapper.PacmanClearTheBoardRewardsWrapper(env)
        env = super_simple_dqn_wrapper.FearDeathWrapper(env)
    elif env_id == "FreewayNoFrameskip-v4":
        env = super_simple_dqn_wrapper.AltFreewayRewardsWrapper(env)
        env = super_simple_dqn_wrapper.FreewayUpRewarded(env)
        env.ale.setDifficulty(1)
    elif env_id == "JamesbondNoFrameskip-v4":
        env = super_simple_dqn_wrapper.FearDeathWrapper(env)
    return env
Esempio n. 23
0
def build_env(args):
    '''
    Build a vector of n environments
    '''
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1)

    config.gpu_options.allow_growth = True
    get_session(config=config)

    flatten_dict_observations = alg not in {'her', 'maddpg'}
    env = make_vec_env(env_id,
                       env_type,
                       nenv,
                       seed,
                       reward_scale=args.reward_scale,
                       flatten_dict_observations=flatten_dict_observations,
                       isMultiAgent=True)

    return env
Esempio n. 24
0
def main():
    num_env = 1
    env_id = "CartPole-v1"
    env_type = "classic_control"
    seed = None

    env = make_vec_env(env_id,
                       env_type,
                       num_env,
                       seed,
                       wrapper_kwargs=None,
                       start_index=0,
                       reward_scale=1.0,
                       flatten_dict_observations=True,
                       gamestate=None)

    act = deepq.learn(env,
                      network='mlp',
                      lr=1e-3,
                      total_timesteps=100000,
                      buffer_size=50000,
                      exploration_fraction=0.1,
                      exploration_final_eps=0.02,
                      print_freq=10,
                      callback=callback)
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
Esempio n. 25
0
def build_env(args):

    alg = args.alg
    seed = args.seed

    # tf config
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1)
    config.gpu_options.allow_growth = True
    get_session(config=config)

    flatten_dict_observations = alg not in {'her'}

    env = make_vec_env('MujocoQuadForce-v1',
                       'mujoco',
                       args.num_env or 1,
                       seed,
                       reward_scale=args.reward_scale,
                       flatten_dict_observations=flatten_dict_observations)
    # env = ActionClipWrapper(env)

    # if env_type == 'mujoco':
    #     env = VecNormalize(env)

    return env
Esempio n. 26
0
def main():
    # unpause Simulation so that robot receives data on all topics
    gazebo_connection.GazeboConnection().unpauseSim()
    # create node
    rospy.init_node('pickbot_gym', anonymous=True, log_level=rospy.FATAL)

    env = make_vec_env(env_id,
                       env_type,
                       num_env,
                       seed,
                       wrapper_kwargs=Monitor,
                       start_index=0,
                       reward_scale=1.0,
                       flatten_dict_observations=True,
                       gamestate=None)

    act = trpo_mpi.learn(env=env,
                         network='mlp',
                         total_timesteps=0,
                         load_path=modelsdir + "model")

    obs, done = env.reset(), False
    episode_rew = 0

    while True:
        obs, rew, done, _ = env.step(act.step(obs)[0])
        episode_rew += rew[0] if isinstance(env, VecEnv) else rew
        done = done.any() if isinstance(done, np.ndarray) else done
        if done:
            print('episode_rew={}'.format(episode_rew))
            episode_rew = 0
            obs = env.reset()
Esempio n. 27
0
    def setup_eval_env(self, env_name, seed):
        if env_name == "spaceinvaders":
            env_id = "SpaceInvadersNoFrameskip-v4"
        elif env_name == "mspacman":
            env_id = "MsPacmanNoFrameskip-v4"
        elif env_name == "videopinball":
            env_id = "VideoPinballNoFrameskip-v4"
        elif env_name == "beamrider":
            env_id = "BeamRiderNoFrameskip-v4"
        else:
            env_id = env_name[0].upper() + env_name[1:] + "NoFrameskip-v4"
        env_type = "atari"
        #env id, env type, num envs, and seed
        env = make_vec_env(env_id,
                           env_type,
                           1,
                           seed,
                           wrapper_kwargs={
                               'clip_rewards': False,
                               'episode_life': False,
                           })
        if env_type == 'atari':
            env = VecFrameStack(env, 4)

        print("env actions", env.action_space)
        return env
Esempio n. 28
0
def make_envs(
    env_name,
    num_env,
    seed,
    max_eplen,
    frame_stack_size=4,
    noop_reset=True,
    fire_reset=True,
    eval_dir: Path = None,
    use_logger=True,
    video_recorder=False):
    eval_envs = make_vec_env(
        env_name, 'atari',
        num_env=num_env, seed=seed,
        max_episode_steps=max_eplen,
        noop_reset=noop_reset,
        use_logger=use_logger,
        wrapper_kwargs={'fire_reset': fire_reset},
    )
    eval_envs = VecFrameStack(eval_envs, frame_stack_size)
    if video_recorder:
        eval_envs = VecVideoRecorder(
            eval_envs,
            str(eval_dir / 'videos'),
            record_video_trigger=lambda _: True,
            video_length=max_eplen,
        )
    return eval_envs
Esempio n. 29
0
 def build_pend_env(args, **kwargs):
     return make_vec_env(args.env,
                         'classic_control',
                         args.num_env or 1,
                         args.seed,
                         reward_scale=args.reward_scale,
                         flatten_dict_observations=True)
Esempio n. 30
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == "darwin": ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    # Setup the Tensorflow session
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1)
    config.gpu_options.allow_growth = True
    get_session(config=config)

    flatten_dict_observations = alg not in {'her'}
    # Make the environment here ~~~
    env = make_vec_env(env_id,
                       env_type,
                       args.num_env or 1,
                       seed,
                       reward_scale=args.reward_scale,
                       flatten_dict_observations=flatten_dict_observations)
    print(" ------------- env made ----------------------")
    if env_type == 'mujoco':
        rospy.logdebug('env_type == Mujoco')
        env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 31
0
def main():

    args = parse_args()

    format_strs = ['log', 'csv', 'stdout']

    if args.tensorboard:
        format_strs.append('tensorboard')

    config = parse_config(args.config)

    outdir = os.path.join(args.outdir,
                          os.path.splitext(os.path.basename(args.config))[0])
    logger.configure(dir=outdir, format_strs=format_strs)

    env_type, env_id = get_env_type(GAME_ENVIRONMENT)
    env = make_vec_env(env_id, env_type, 1, args.seed)

    model = trpo_mpi.learn(env=env,
                           network=NETWORK_ARCHITECTURE,
                           total_timesteps=args.total_timesteps,
                           **config)

    env.close()

    if args.save:
        model.save(os.path.join(outdir, 'model'))