Ejemplo n.º 1
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        if args.env_json:
            with open(args.env_json) as f:
                env_kwargs = json.loads(f.read(
                ))  # need to corresponding to env.__init__ arguments
            env = make_vec_env(
                env_id,
                env_type,
                args.num_env or 1,
                seed,
                env_kwargs=env_kwargs,
                reward_scale=args.reward_scale,
                flatten_dict_observations=flatten_dict_observations)
        else:
            env = make_vec_env(
                env_id,
                env_type,
                args.num_env or 1,
                seed,
                reward_scale=args.reward_scale,
                flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 2
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
Ejemplo n.º 3
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

            # Sgillen: added guillaume's fix for the vec normalize loading
            # ==============================================================================

            if args.__contains__('load_path'):
                sess = get_session()
                loaded_params = joblib.load(osp.expanduser(args.load_path))
                restores = []
                for v in tf.trainable_variables():
                    restores.append(v.assign(loaded_params[v.name]))
                sess.run(restores)
                env.ob_rms._set_mean_var_count()
                env.ret_rms._set_mean_var_count()
                # print(dir(env.ret_rms))

    return env
Ejemplo n.º 4
0
def build_env(num_env,alg,seed,env_type,env_id,reward_scale,gamestate=None):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = num_env or ncpu

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=gamestate, reward_scale=reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env    
Ejemplo n.º 5
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1,
                               gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.20))
       config.gpu_options.allow_growth = True
       get_session(config=config)
       
       flatten_dict_observations = alg not in {'her'}
       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
       
       normalize_value = args.normalize_value
       if (env_type == 'mujoco' or env_type=='roboschool') and normalize_value:
           env = VecNormalize(env)

    return env
Ejemplo n.º 6
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
Ejemplo n.º 7
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type == 'threepass':
        env = make_m_three_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'pass':
        env = make_multi_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'x_pass':
        env = make_x_pass_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'leftward':
        env = make_m_leftward_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'island':
        env = make_m_island_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'x_island':
        env = make_m_x_island_env(env_id, env_type, nenv, seed, args)
    elif env_type == 'pushball':
        env = make_m_pushball_env(env_id, env_type, nenv, seed, args)
    elif env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 8
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

       if env_type == 'mujoco':
           env = VecNormalize(env)

    return env
Ejemplo n.º 9
0
def build_env(args, normalize_ob=True, is_eval=False):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    #nenv = num_env or ncpu
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            #env = make_vec_env(env_id, env_type, 1, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        '''gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(allow_soft_placement=True,
                               gpu_options=gpu_options,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)'''
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True

        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}

        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            logger.log('build_env: normalize_ob', normalize_ob)
            #sys.exit()
            env = VecNormalize(env,
                               ob=normalize_ob,
                               is_training=not is_eval,
                               use_tf=True)
    return env
Ejemplo n.º 10
0
def build_env(args, extra_args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    elif env_type == "custom":
        try:
            if extra_args["step_size"] == "hour":
                env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            elif extra_args["step_size"] == "day":
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            else:
                print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.")
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
        except KeyError as e:
            raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.")

        # wrap it
        #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M')
        #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp)
        logger_dir = logger.get_dir()
        # hard coded mpi_rank and subrank to 0
        env = Monitor(env,
                      logger_dir and os.path.join(logger_dir, "0.0"),
                      allow_early_resets=True)
        env = DummyVecEnv([lambda: env])
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 11
0
def build_env(args, silent_monitor, prio_args=None):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale,
                               prio_args=prio_args,
                               silent_monitor=silent_monitor)
            if prio_args is None:
                env = VecFrameStack(env, frame_stack_size)
            else:
                env = PrioVecFrameStack(env, frame_stack_size)

            # TODO prio vec frame stack

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        num_env = args.n_active_envs if prio_args is None else args.num_env
        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations,
                           prio_args=prio_args,
                           silent_monitor=silent_monitor)

        if env_type == 'mujoco':
            if prio_args is None:
                env = VecNormalize(env)
            else:
                env = PrioVecNormalize(env)

    return env
Ejemplo n.º 12
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    print(env_id)
    #extract the agc_env_name
    noskip_idx = env_id.find("NoFrameskip")
    env_name = env_id[:noskip_idx].lower()
    print("Env Name for Masking:", env_name)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
       config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
       config.gpu_options.allow_growth = True
       get_session(config=config)

       env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale)

    if args.custom_reward != '':
        from baselines.common.vec_env import VecEnv, VecEnvWrapper
        import baselines.common.custom_reward_wrapper as W
        assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper)

        custom_reward_kwargs = eval(args.custom_reward_kwargs)

        if args.custom_reward == 'pytorch':
            if args.custom_reward_path == '':
                assert False, 'no path for reward model'
            else:
                env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name)
        else:
            assert False, 'no such wrapper exist'

    if env_type == 'mujoco':
        env = VecNormalize(env)
    # if env_type == 'atari':
    #     input("Normalizing for ATari game: okay? [Enter]")
    #     #normalize rewards but not observations for atari
    #     env = VecNormalizeRewards(env)

    return env
Ejemplo n.º 13
0
def build_env(args, seed):
    nenv = 1
    alg = args.alg
    # seed = args.seed
    seed = int(np.random.rand(1) * 101000)
    print(seed)

    env_type, env_id = get_env_type(args.env)
    set_global_seeds(seed)
    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        # config = tf.ConfigProto(allow_soft_placement=True,
        #                        intra_op_parallelism_threads=1,
        #                        inter_op_parallelism_threads=1)
        # config.gpu_options.allow_growth = True
        # get_session(config=config)
        sess = tf.InteractiveSession()
        # env = VecNormalize(make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale))

        env = make_vec_env(env_id,
                           env_type,
                           args.numenv,
                           seed,
                           reward_scale=args.reward_scale)
        evalenv = make_vec_env(env_id,
                               env_type,
                               args.numenv,
                               seed,
                               reward_scale=args.reward_scale)

        # if env_type == 'mujoco':
        #     env = VecNormalize(env)
        #     evalenv = VecNormalizeEval(evalenv)
        #     evalenv.ob_rms = env.ob_rms
        #     evalenv.ret_rms = env.ret_rms

    return env, sess, evalenv
Ejemplo n.º 14
0
def build_env(args, game_name, method_name, tag):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin':
        ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            logger_dir_path = 'test_logs_{}/{}/{}'.format(
                tag, game_name, method_name)
            logger_dir_exist = os.path.exists(logger_dir_path)
            if not logger_dir_exist:
                os.makedirs(logger_dir_path)
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           logger_dir=logger_dir_path,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 15
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        sess = get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)
        ## Failing to write structure---is it defined in another process?
        #print('writing session graph--I HOPEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE')
        #print(sess.graph)
        #outfile = osp.join(args.storspot, 'tf') if args.storspot else './tf'
        #file_writer = tf.summary.FileWriter(outfile, sess.graph)
        #summary_op = tf.summary.merge_all()

    return env
Ejemplo n.º 16
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed
    play = args.play
    mode = args.mode
    multiplayer = args.multiplayer
    env_type, env_id = get_env_type(args)
    isSpaceInvaders = False
    if "SpaceInvaders" in args.env:
        isSpaceInvaders = True
    if env_type in {'atari', 'retro'}:
        # this should be the only algorithm I'll use
        if alg == 'deepq':
            # BEGIN MY CODE
            # clip reward when training
            # don't clip when playing to see actual score
            # add mode in as an environment parameter
            if play:
                # if I'm playing to see how well the network scores, I want to unclip rewards
                env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode})
            else:
                # otherwise, keep the basic reward used by the base algorithm
                if multiplayer and isSpaceInvaders:
                    # unclip rewards for space invaders multiplayer, I'll do it manually.
                    env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode})
                else:
                    env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': True}, env_kwargs={'game_mode': mode})
            # END MY CODE
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 17
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        args_dict = vars(args)
        del args_dict["seed"]

        #so that seed won't overwrite the method given by gym.Env class
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations,
                           env_kwargs=args_dict)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 18
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu  #TODO: removed or ncpu
    print("ncpu = {}".format(ncpu))
    print("Nenv = {}".format(nenv))
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        print("TF config starting...")
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        print("Get session ...")
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        print("Starting make_vec_env")
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 19
0
def build_env(args):
    logger = logging.getLogger()
    coloredlogs.install(level='DEBUG', fmt='%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s')
    logger.setLevel(logging.DEBUG)
    
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        # TODO: Ensure willuse GPU when sent to SLURM (Add as a command-line argument)
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)


        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    if env_id == "MsPacmanNoFrameskip-v4":
        env = super_simple_dqn_wrapper.PacmanClearTheBoardRewardsWrapper(env)
        env = super_simple_dqn_wrapper.FearDeathWrapper(env)
    elif env_id == "FreewayNoFrameskip-v4":
        env = super_simple_dqn_wrapper.AltFreewayRewardsWrapper(env)
        env = super_simple_dqn_wrapper.FreewayUpRewarded(env)
        env.ale.setDifficulty(1)
    elif env_id == "JamesbondNoFrameskip-v4":
        env = super_simple_dqn_wrapper.FearDeathWrapper(env)
    return env
Ejemplo n.º 20
0
def build_env(args, train=True):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    alg = args.alg
    seed = args.seed
    env_type, env_id = get_env_type(args)

    if env_type in {'atari'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        else:
            frame_stack_size = 4
            if train:
                env = make_vec_env(env_id, env_type, args.num_env or ncpu, seed, reward_scale=args.reward_scale)
            else:
                env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        flatten_dict_observations = alg not in {'her'}
        if train:
            env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)
        else:
            env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        # if env_type == 'mujoco':
        #     env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 21
0
def play(load_path, env_name, env_kwargs):
    # load the environment
    logger.log("Loading the environment")
    env = make_env(
        env_name,
        env_type="robotics",
        flatten_dict_observations=False,
        env_kwargs=env_kwargs,
    )

    # load the model
    logger.log("Loading the model")
    model = load_policy(env_name=env_name, network="mlp", load_path=load_path)

    # Running the model
    logger.log("Running the loaded model")
    while True:
        obs = env.reset()
        episode_rew = 0
        done = False
        while not done:
            action, _, _, _ = model.step(obs)
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
            env.render()
            if done:
                print("episode_rew={}".format(episode_rew))
                episode_rew = 0
    env.close()
Ejemplo n.º 22
0
def build_env(args, cloth_cfg_path=None, render_path=None, start_state_path=None):
    """Daniel: actually construct the env, using 'vector envs' for parallelism.

    For now our cloth env can follow the non-atari and non-retro stuff, because
    I don't think we need a similar kind of 'wrapping' that they do. Note that
    `VecFrameStack` is needed to stack frames, e.g., in Atari we do 4 frame
    stacking. Without that, the states would be size (84,84,1).

    The non-`args` parameters here are for the cloth env.
    """
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed
    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)
        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations,
                           cloth_cfg_path=cloth_cfg_path,
                           render_path=render_path,
                           start_state_path=start_state_path)
        # https://github.com/openai/baselines/issues/938
        if env_type == 'mujoco' and alg != 'ddpg':
            env = VecNormalize(env)

    return env
Ejemplo n.º 23
0
 def make_thunk(rank):
     return lambda: make_env(env_id=env_id,
                             env_type=env_type,
                             mpi_rank=mpi_rank,
                             subrank=rank,
                             seed=seed,
                             reward_scale=reward_scale,
                             gamestate=gamestate,
                             flatten_dict_observations=
                             flatten_dict_observations,
                             wrapper_kwargs=wrapper_kwargs,
                             logger_dir=logger_dir)
Ejemplo n.º 24
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)
    if env_type in {'atari', 'retro', 'gym_ple'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}, num_reward = args.num_reward, reward_type = args.reward_type)
            print("env with frame_stack")
        elif alg in ['trpo_mpi'] + mr_algs:
            env = make_env(env_id, env_type, seed=seed, num_reward = args.num_reward, reward_type = args.reward_type)
            print("normal env")
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale, num_reward = args.num_reward, reward_type = args.reward_type)
            env = VecFrameStack(env, frame_stack_size)
            print("VecFrameStack env")
    elif alg in mr_algs:
        env = make_env(env_id, env_type, seed=seed, num_reward = args.num_reward, reward_type = args.reward_type)
        print("normal env")
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, num_reward = args.num_reward, reward_type = args.reward_type)
        print("make_vec_env")
        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
Ejemplo n.º 25
0
def main():
    returns, safeties = [], []
    env = make_env("EVCharging-v1",
                   "safety",
                   seed=seed,
                   wrapper_kwargs={'frame_stack': True},
                   env_kwargs={'train': False})
    act = deepq.learn(
        env,
        network=mlp(num_hidden=64, num_layers=3),
        lr=1e-3,
        total_timesteps=0,
        load_path=
        '/home/lihepeng/Documents/Github/tmp/ev/dqn/train/dqn_eta_is_{}.pkl'.
        format(penalty),
    )
    dates = env.unwrapped._price['date'].unique()[1:-1]
    d = 0
    obs, done = env.reset(**{"arr_date": dates[d]}), False
    while True:
        episode_rew, episode_sft = 0, 0
        while not done:
            obs, rew, done, info = env.step(act(obs[None])[0])
            episode_rew += info["r"]
            episode_sft += info["s"]
        print("Episode reward {}, safety {}".format(episode_rew, episode_sft))
        returns.append(episode_rew)
        safeties.append(episode_sft)
        d += 1
        if d >= dates.size:
            break
        # env.render()
        obs, done = env.reset(**{"arr_date": dates[d]}), False

    print('test returns: {}'.format(np.sum(returns)))
    print('test safeties: {}'.format(np.sum(safeties)))

    np.save(
        '/home/lihepeng/Documents/Github/tmp/ev/dqn/test/returns_{}'.format(
            penalty), returns)
    np.save(
        '/home/lihepeng/Documents/Github/tmp/ev/dqn/test/safeties_{}'.format(
            penalty), safeties)
Ejemplo n.º 26
0
def build_env(env_id,
              env_type=None,
              num_env=1,
              batch=False,
              seed=None,
              reward_scale=1.0,
              gamestate=None,
              frame_stack=False,
              logger_dir=None):
    #ncpu = multiprocessing.cpu_count()

    env_type, env_id = get_env_type(env_id, env_type)

    if batch:
        env = make_vec_env(env_id,
                           env_type,
                           num_env,
                           seed,
                           gamestate=gamestate,
                           reward_scale=reward_scale)
        if frame_stack:
            frame_stack_size = 4
            env = VecFrameStack(env, frame_stack_size)
    else:
        assert num_env == 1 or num_env is None
        # assuming stack 4 if frame_stack is true
        env = make_env(env_id,
                       env_type,
                       seed=seed,
                       reward_scale=reward_scale,
                       gamestate=gamestate,
                       wrapper_kwargs={'frame_stack': frame_stack},
                       logger_dir=logger_dir)

    if env_type == 'mujoco':
        env = VecNormalize(env, use_tf=False)
    return env
Ejemplo n.º 27
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale)

    if args.custom_reward != '':
        from baselines.common.vec_env import VecEnv, VecEnvWrapper
        import baselines.common.custom_reward_wrapper as W
        assert isinstance(env, VecEnv) or isinstance(env, VecEnvWrapper)

        custom_reward_kwargs = eval(args.custom_reward_kwargs)

        if args.custom_reward == 'live_long':
            env = W.VecLiveLongReward(env, **custom_reward_kwargs)
        elif args.custom_reward == 'random_tf':
            env = W.VecTFRandomReward(env, **custom_reward_kwargs)
        elif args.custom_reward == 'preference':
            env = W.VecTFPreferenceReward(env, **custom_reward_kwargs)
        elif args.custom_reward == 'preference_normalized':
            env = W.VecTFPreferenceRewardNormalized(env,
                                                    **custom_reward_kwargs)
        else:
            assert False, 'no such wrapper exist'

    if env_type == 'mujoco':
        env = VecNormalize(env)

    return env
Ejemplo n.º 28
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro', 'envs'}:
        if alg == 'deepq':
            if args.augmentation is not None: args.augmentation += '_product'
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={
                               'frame_stack': True,
                               'clip_rewards': False
                           },
                           logger_dir=logger.get_dir())
        elif alg == 'trpo_mpi':
            if args.augmentation is not None:
                args.augmentation += '_not_implemented'
            env = make_env(env_id, env_type, seed=seed)
        else:
            if args.augmentation is not None: args.augmentation += '_concat'
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    constraints = []
    if args.constraints is not None:
        if not args.is_hard:
            assert args.reward_shaping is not None
            assert len(args.constraints) == len(
                args.reward_shaping)  # should be parallel lists
            reward_shaping = args.reward_shaping
        else:
            reward_shaping = [0.] * len(args.constraints)
        constraints = [
            get_constraint(s)(args.is_hard, args.is_dense, r)
            for (s, r) in zip(args.constraints, reward_shaping)
        ]
        env = ConstraintStepMonitor(
            ConstraintEnv(env,
                          constraints,
                          augmentation_type=args.augmentation,
                          log_dir=logger.get_dir()), logger.get_dir())

    return env, constraints
Ejemplo n.º 29
0
def build_env(args, extra_args):
    if 'Lock-v0' in args.env:
        # Build combination lock environment
        import Environments
        env = gym.make('Lock-v0')
        ep_dict = {
            'horizon': args.horizon,
            'dimension': args.dimension,
            'switch': 0.1,
            'tabular': False
        }

        env.init(env_config=ep_dict)
        return env
    elif 'diabcombolock' in args.env:
        return build_env_homer(horizon=args.horizon, seed=args.seed)
    elif 'maze' in args.env:
        import maze
        args.maze_size = int(re.findall(r'\d+', args.env)[0])
        env = maze.MazeEnv(size=args.maze_size, time=100, holes=0, num_goal=1)
        return env

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale,
                           flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 30
0
def build_highlights_env(args):
    logger = logging.getLogger()
    coloredlogs.install(
        level='DEBUG',
        fmt=
        '%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s'
    )
    logger.setLevel(logging.DEBUG)

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type = 'atari'
    env_id = args.env
    # Default alg is dqn, so make initial normal dqn environment
    env = make_env(env_id,
                   env_type,
                   seed=seed,
                   wrapper_kwargs={'frame_stack': True})

    #logger.info("About to check for training wrapper")
    # Now switch on the training-based args to add wrappers ass needed
    if args.training_wrapper == 'pacman_fear_only':
        env = super_simple_dqn_wrapper.fear_only(env)
        #logger.info("Training wrapper: " + str(args.training_wrapper))
    if args.training_wrapper == 'pacman_power_pill_only':
        env = super_simple_dqn_wrapper.pacman_power_pill_only(env)
        #logger.info("Training wrapper: " + str(args.training_wrapper))
    if args.training_wrapper == 'pacman_normal_pill_only':
        env = super_simple_dqn_wrapper.pacman_normal_pill_only(env)
    if args.training_wrapper == 'pacman_normal_pill_power_pill_only':
        env = super_simple_dqn_wrapper.pacman_normal_pill_power_pill_only(env)
    if args.training_wrapper == 'pacman_normal_pill_fear_only':
        env = super_simple_dqn_wrapper.pacman_normal_pill_fear_only(env)
    if args.training_wrapper == 'pacman_normal_pill_in_game':
        env = super_simple_dqn_wrapper.pacman_normal_pill_in_game(env)
    if args.training_wrapper == 'pacman_power_pill_fear_only':
        env = super_simple_dqn_wrapper.pacman_power_pill_fear_only(env)
    if args.training_wrapper == 'pacman_power_pill_in_game':
        env = super_simple_dqn_wrapper.pacman_power_pill_in_game(env)
    if args.training_wrapper == 'pacman_fear_in_game':
        env = super_simple_dqn_wrapper.pacman_fear_in_game(env)
    # training options for freeway (also specifies the environment)
    if args.training_wrapper == 'freeway_up_only':
        env = super_simple_dqn_wrapper.freeway_up_only(env)
    if args.training_wrapper == 'freeway_down_only':
        env = super_simple_dqn_wrapper.freeway_down_only(env)
    if args.training_wrapper == 'freeway_up_down':
        env = super_simple_dqn_wrapper.freeway_up_down(env)
    # training options for asterix (also specifies the environment)
    if args.training_wrapper == 'asterix_fear_only':
        env = super_simple_dqn_wrapper.fear_only(env)
    if args.training_wrapper == 'asterix_bonus_life_in_game':
        env = super_simple_dqn_wrapper.asterix_bonus_life_in_game(env)
    if args.training_wrapper == 'asterix_fear_in_game':
        env = super_simple_dqn_wrapper.asterix_fear_in_game(env)
    # training options for alien (also specifies the environment)
    if args.training_wrapper == 'alien_fear_only':
        env = super_simple_dqn_wrapper.fear_only(env)
    if args.training_wrapper == 'alien_pulsar_only':
        env = super_simple_dqn_wrapper.alien_pulsar_only(env)
    if args.training_wrapper == 'alien_eggs_only':
        env = super_simple_dqn_wrapper.alien_eggs_only(env)
    if args.training_wrapper == 'alien_eggs_pulsar_only':
        env = super_simple_dqn_wrapper.alien_eggs_pulsar_only(env)
    if args.training_wrapper == 'alien_eggs_fear_only':
        env = super_simple_dqn_wrapper.alien_eggs_fear_only(env)
    if args.training_wrapper == 'alien_eggs_in_game':
        env = super_simple_dqn_wrapper.alien_eggs_in_game(env)
    if args.training_wrapper == 'alien_pulsar_fear_only':
        env = super_simple_dqn_wrapper.alien_pulsar_fear_only(env)
    if args.training_wrapper == 'alien_pulsar_in_game':
        env = super_simple_dqn_wrapper.alien_pulsar_in_game(env)
    if args.training_wrapper == 'alien_fear_in_game':
        env = super_simple_dqn_wrapper.alien_fear_in_game(env)
    return env
Ejemplo n.º 31
0
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from scipy import stats

from forkan.models import VAE

from baselines.common.cmd_util import make_env

env_id = 'Pendulum-v0'
env_type = 'classic_control'

env = make_env(env_id, env_type, vae_pend=True)

env.reset()

v = VAE(load_from='pend-optimal', network='pendulum')

t = 0
idx = 2

ths = []
zss = []

thds = []
zdots = []

old_z = 0