def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} if args.env_json: with open(args.env_json) as f: env_kwargs = json.loads(f.read( )) # need to corresponding to env.__init__ arguments env = make_vec_env( env_id, env_type, args.num_env or 1, seed, env_kwargs=env_kwargs, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) else: env = make_vec_env( env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) # Sgillen: added guillaume's fix for the vec normalize loading # ============================================================================== if args.__contains__('load_path'): sess = get_session() loaded_params = joblib.load(osp.expanduser(args.load_path)) restores = [] for v in tf.trainable_variables(): restores.append(v.assign(loaded_params[v.name])) sess.run(restores) env.ob_rms._set_mean_var_count() env.ret_rms._set_mean_var_count() # print(dir(env.ret_rms)) return env
def build_env(num_env,alg,seed,env_type,env_id,reward_scale,gamestate=None): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = num_env or ncpu if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=gamestate, reward_scale=reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.20)) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) normalize_value = args.normalize_value if (env_type == 'mujoco' or env_type=='roboschool') and normalize_value: env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type == 'threepass': env = make_m_three_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'pass': env = make_multi_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'x_pass': env = make_x_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'leftward': env = make_m_leftward_env(env_id, env_type, nenv, seed, args) elif env_type == 'island': env = make_m_island_env(env_id, env_type, nenv, seed, args) elif env_type == 'x_island': env = make_m_x_island_env(env_id, env_type, nenv, seed, args) elif env_type == 'pushball': env = make_m_pushball_env(env_id, env_type, nenv, seed, args) elif env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args, normalize_ob=True, is_eval=False): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 #nenv = num_env or ncpu nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 #env = make_vec_env(env_id, env_type, 1, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: '''gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)''' config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': logger.log('build_env: normalize_ob', normalize_ob) #sys.exit() env = VecNormalize(env, ob=normalize_ob, is_training=not is_eval, use_tf=True) return env
def build_env(args, extra_args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) elif env_type == "custom": try: if extra_args["step_size"] == "hour": env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) elif extra_args["step_size"] == "day": env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) else: print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.") env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) except KeyError as e: raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.") # wrap it #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M') #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp) logger_dir = logger.get_dir() # hard coded mpi_rank and subrank to 0 env = Monitor(env, logger_dir and os.path.join(logger_dir, "0.0"), allow_early_resets=True) env = DummyVecEnv([lambda: env]) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args, silent_monitor, prio_args=None): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale, prio_args=prio_args, silent_monitor=silent_monitor) if prio_args is None: env = VecFrameStack(env, frame_stack_size) else: env = PrioVecFrameStack(env, frame_stack_size) # TODO prio vec frame stack else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) num_env = args.n_active_envs if prio_args is None else args.num_env flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, prio_args=prio_args, silent_monitor=silent_monitor) if env_type == 'mujoco': if prio_args is None: env = VecNormalize(env) else: env = PrioVecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) print(env_id) #extract the agc_env_name noskip_idx = env_id.find("NoFrameskip") env_name = env_id[:noskip_idx].lower() print("Env Name for Masking:", env_name) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if args.custom_reward != '': from baselines.common.vec_env import VecEnv, VecEnvWrapper import baselines.common.custom_reward_wrapper as W assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper) custom_reward_kwargs = eval(args.custom_reward_kwargs) if args.custom_reward == 'pytorch': if args.custom_reward_path == '': assert False, 'no path for reward model' else: env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name) else: assert False, 'no such wrapper exist' if env_type == 'mujoco': env = VecNormalize(env) # if env_type == 'atari': # input("Normalizing for ATari game: okay? [Enter]") # #normalize rewards but not observations for atari # env = VecNormalizeRewards(env) return env
def build_env(args, seed): nenv = 1 alg = args.alg # seed = args.seed seed = int(np.random.rand(1) * 101000) print(seed) env_type, env_id = get_env_type(args.env) set_global_seeds(seed) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: # config = tf.ConfigProto(allow_soft_placement=True, # intra_op_parallelism_threads=1, # inter_op_parallelism_threads=1) # config.gpu_options.allow_growth = True # get_session(config=config) sess = tf.InteractiveSession() # env = VecNormalize(make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale)) env = make_vec_env(env_id, env_type, args.numenv, seed, reward_scale=args.reward_scale) evalenv = make_vec_env(env_id, env_type, args.numenv, seed, reward_scale=args.reward_scale) # if env_type == 'mujoco': # env = VecNormalize(env) # evalenv = VecNormalizeEval(evalenv) # evalenv.ob_rms = env.ob_rms # evalenv.ret_rms = env.ret_rms return env, sess, evalenv
def build_env(args, game_name, method_name, tag): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': logger_dir_path = 'test_logs_{}/{}/{}'.format( tag, game_name, method_name) logger_dir_exist = os.path.exists(logger_dir_path) if not logger_dir_exist: os.makedirs(logger_dir_path) env = make_env(env_id, env_type, seed=seed, logger_dir=logger_dir_path, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True sess = get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) ## Failing to write structure---is it defined in another process? #print('writing session graph--I HOPEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE') #print(sess.graph) #outfile = osp.join(args.storspot, 'tf') if args.storspot else './tf' #file_writer = tf.summary.FileWriter(outfile, sess.graph) #summary_op = tf.summary.merge_all() return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed play = args.play mode = args.mode multiplayer = args.multiplayer env_type, env_id = get_env_type(args) isSpaceInvaders = False if "SpaceInvaders" in args.env: isSpaceInvaders = True if env_type in {'atari', 'retro'}: # this should be the only algorithm I'll use if alg == 'deepq': # BEGIN MY CODE # clip reward when training # don't clip when playing to see actual score # add mode in as an environment parameter if play: # if I'm playing to see how well the network scores, I want to unclip rewards env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode}) else: # otherwise, keep the basic reward used by the base algorithm if multiplayer and isSpaceInvaders: # unclip rewards for space invaders multiplayer, I'll do it manually. env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode}) else: env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': True}, env_kwargs={'game_mode': mode}) # END MY CODE elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} args_dict = vars(args) del args_dict["seed"] #so that seed won't overwrite the method given by gym.Env class env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, env_kwargs=args_dict) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu #TODO: removed or ncpu print("ncpu = {}".format(ncpu)) print("Nenv = {}".format(nenv)) alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: print("TF config starting...") config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True print("Get session ...") get_session(config=config) flatten_dict_observations = alg not in {'her'} print("Starting make_vec_env") env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): logger = logging.getLogger() coloredlogs.install(level='DEBUG', fmt='%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s') logger.setLevel(logging.DEBUG) ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: # TODO: Ensure willuse GPU when sent to SLURM (Add as a command-line argument) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) if env_id == "MsPacmanNoFrameskip-v4": env = super_simple_dqn_wrapper.PacmanClearTheBoardRewardsWrapper(env) env = super_simple_dqn_wrapper.FearDeathWrapper(env) elif env_id == "FreewayNoFrameskip-v4": env = super_simple_dqn_wrapper.AltFreewayRewardsWrapper(env) env = super_simple_dqn_wrapper.FreewayUpRewarded(env) env.ale.setDifficulty(1) elif env_id == "JamesbondNoFrameskip-v4": env = super_simple_dqn_wrapper.FearDeathWrapper(env) return env
def build_env(args, train=True): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) else: frame_stack_size = 4 if train: env = make_vec_env(env_id, env_type, args.num_env or ncpu, seed, reward_scale=args.reward_scale) else: env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: flatten_dict_observations = alg not in {'her'} if train: env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) else: env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) # if env_type == 'mujoco': # env = VecNormalize(env, use_tf=True) return env
def play(load_path, env_name, env_kwargs): # load the environment logger.log("Loading the environment") env = make_env( env_name, env_type="robotics", flatten_dict_observations=False, env_kwargs=env_kwargs, ) # load the model logger.log("Loading the model") model = load_policy(env_name=env_name, network="mlp", load_path=load_path) # Running the model logger.log("Running the loaded model") while True: obs = env.reset() episode_rew = 0 done = False while not done: action, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(action) episode_rew += rew env.render() if done: print("episode_rew={}".format(episode_rew)) episode_rew = 0 env.close()
def build_env(args, cloth_cfg_path=None, render_path=None, start_state_path=None): """Daniel: actually construct the env, using 'vector envs' for parallelism. For now our cloth env can follow the non-atari and non-retro stuff, because I don't think we need a similar kind of 'wrapping' that they do. Note that `VecFrameStack` is needed to stack frames, e.g., in Atari we do 4 frame stacking. Without that, the states would be size (84,84,1). The non-`args` parameters here are for the cloth env. """ ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, cloth_cfg_path=cloth_cfg_path, render_path=render_path, start_state_path=start_state_path) # https://github.com/openai/baselines/issues/938 if env_type == 'mujoco' and alg != 'ddpg': env = VecNormalize(env) return env
def make_thunk(rank): return lambda: make_env(env_id=env_id, env_type=env_type, mpi_rank=mpi_rank, subrank=rank, seed=seed, reward_scale=reward_scale, gamestate=gamestate, flatten_dict_observations= flatten_dict_observations, wrapper_kwargs=wrapper_kwargs, logger_dir=logger_dir)
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro', 'gym_ple'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}, num_reward = args.num_reward, reward_type = args.reward_type) print("env with frame_stack") elif alg in ['trpo_mpi'] + mr_algs: env = make_env(env_id, env_type, seed=seed, num_reward = args.num_reward, reward_type = args.reward_type) print("normal env") else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale, num_reward = args.num_reward, reward_type = args.reward_type) env = VecFrameStack(env, frame_stack_size) print("VecFrameStack env") elif alg in mr_algs: env = make_env(env_id, env_type, seed=seed, num_reward = args.num_reward, reward_type = args.reward_type) print("normal env") else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, num_reward = args.num_reward, reward_type = args.reward_type) print("make_vec_env") if env_type == 'mujoco': env = VecNormalize(env) return env
def main(): returns, safeties = [], [] env = make_env("EVCharging-v1", "safety", seed=seed, wrapper_kwargs={'frame_stack': True}, env_kwargs={'train': False}) act = deepq.learn( env, network=mlp(num_hidden=64, num_layers=3), lr=1e-3, total_timesteps=0, load_path= '/home/lihepeng/Documents/Github/tmp/ev/dqn/train/dqn_eta_is_{}.pkl'. format(penalty), ) dates = env.unwrapped._price['date'].unique()[1:-1] d = 0 obs, done = env.reset(**{"arr_date": dates[d]}), False while True: episode_rew, episode_sft = 0, 0 while not done: obs, rew, done, info = env.step(act(obs[None])[0]) episode_rew += info["r"] episode_sft += info["s"] print("Episode reward {}, safety {}".format(episode_rew, episode_sft)) returns.append(episode_rew) safeties.append(episode_sft) d += 1 if d >= dates.size: break # env.render() obs, done = env.reset(**{"arr_date": dates[d]}), False print('test returns: {}'.format(np.sum(returns))) print('test safeties: {}'.format(np.sum(safeties))) np.save( '/home/lihepeng/Documents/Github/tmp/ev/dqn/test/returns_{}'.format( penalty), returns) np.save( '/home/lihepeng/Documents/Github/tmp/ev/dqn/test/safeties_{}'.format( penalty), safeties)
def build_env(env_id, env_type=None, num_env=1, batch=False, seed=None, reward_scale=1.0, gamestate=None, frame_stack=False, logger_dir=None): #ncpu = multiprocessing.cpu_count() env_type, env_id = get_env_type(env_id, env_type) if batch: env = make_vec_env(env_id, env_type, num_env, seed, gamestate=gamestate, reward_scale=reward_scale) if frame_stack: frame_stack_size = 4 env = VecFrameStack(env, frame_stack_size) else: assert num_env == 1 or num_env is None # assuming stack 4 if frame_stack is true env = make_env(env_id, env_type, seed=seed, reward_scale=reward_scale, gamestate=gamestate, wrapper_kwargs={'frame_stack': frame_stack}, logger_dir=logger_dir) if env_type == 'mujoco': env = VecNormalize(env, use_tf=False) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if args.custom_reward != '': from baselines.common.vec_env import VecEnv, VecEnvWrapper import baselines.common.custom_reward_wrapper as W assert isinstance(env, VecEnv) or isinstance(env, VecEnvWrapper) custom_reward_kwargs = eval(args.custom_reward_kwargs) if args.custom_reward == 'live_long': env = W.VecLiveLongReward(env, **custom_reward_kwargs) elif args.custom_reward == 'random_tf': env = W.VecTFRandomReward(env, **custom_reward_kwargs) elif args.custom_reward == 'preference': env = W.VecTFPreferenceReward(env, **custom_reward_kwargs) elif args.custom_reward == 'preference_normalized': env = W.VecTFPreferenceRewardNormalized(env, **custom_reward_kwargs) else: assert False, 'no such wrapper exist' if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro', 'envs'}: if alg == 'deepq': if args.augmentation is not None: args.augmentation += '_product' env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={ 'frame_stack': True, 'clip_rewards': False }, logger_dir=logger.get_dir()) elif alg == 'trpo_mpi': if args.augmentation is not None: args.augmentation += '_not_implemented' env = make_env(env_id, env_type, seed=seed) else: if args.augmentation is not None: args.augmentation += '_concat' frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) constraints = [] if args.constraints is not None: if not args.is_hard: assert args.reward_shaping is not None assert len(args.constraints) == len( args.reward_shaping) # should be parallel lists reward_shaping = args.reward_shaping else: reward_shaping = [0.] * len(args.constraints) constraints = [ get_constraint(s)(args.is_hard, args.is_dense, r) for (s, r) in zip(args.constraints, reward_shaping) ] env = ConstraintStepMonitor( ConstraintEnv(env, constraints, augmentation_type=args.augmentation, log_dir=logger.get_dir()), logger.get_dir()) return env, constraints
def build_env(args, extra_args): if 'Lock-v0' in args.env: # Build combination lock environment import Environments env = gym.make('Lock-v0') ep_dict = { 'horizon': args.horizon, 'dimension': args.dimension, 'switch': 0.1, 'tabular': False } env.init(env_config=ep_dict) return env elif 'diabcombolock' in args.env: return build_env_homer(horizon=args.horizon, seed=args.seed) elif 'maze' in args.env: import maze args.maze_size = int(re.findall(r'\d+', args.env)[0]) env = maze.MazeEnv(size=args.maze_size, time=100, holes=0, num_goal=1) return env ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_highlights_env(args): logger = logging.getLogger() coloredlogs.install( level='DEBUG', fmt= '%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s' ) logger.setLevel(logging.DEBUG) ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type = 'atari' env_id = args.env # Default alg is dqn, so make initial normal dqn environment env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) #logger.info("About to check for training wrapper") # Now switch on the training-based args to add wrappers ass needed if args.training_wrapper == 'pacman_fear_only': env = super_simple_dqn_wrapper.fear_only(env) #logger.info("Training wrapper: " + str(args.training_wrapper)) if args.training_wrapper == 'pacman_power_pill_only': env = super_simple_dqn_wrapper.pacman_power_pill_only(env) #logger.info("Training wrapper: " + str(args.training_wrapper)) if args.training_wrapper == 'pacman_normal_pill_only': env = super_simple_dqn_wrapper.pacman_normal_pill_only(env) if args.training_wrapper == 'pacman_normal_pill_power_pill_only': env = super_simple_dqn_wrapper.pacman_normal_pill_power_pill_only(env) if args.training_wrapper == 'pacman_normal_pill_fear_only': env = super_simple_dqn_wrapper.pacman_normal_pill_fear_only(env) if args.training_wrapper == 'pacman_normal_pill_in_game': env = super_simple_dqn_wrapper.pacman_normal_pill_in_game(env) if args.training_wrapper == 'pacman_power_pill_fear_only': env = super_simple_dqn_wrapper.pacman_power_pill_fear_only(env) if args.training_wrapper == 'pacman_power_pill_in_game': env = super_simple_dqn_wrapper.pacman_power_pill_in_game(env) if args.training_wrapper == 'pacman_fear_in_game': env = super_simple_dqn_wrapper.pacman_fear_in_game(env) # training options for freeway (also specifies the environment) if args.training_wrapper == 'freeway_up_only': env = super_simple_dqn_wrapper.freeway_up_only(env) if args.training_wrapper == 'freeway_down_only': env = super_simple_dqn_wrapper.freeway_down_only(env) if args.training_wrapper == 'freeway_up_down': env = super_simple_dqn_wrapper.freeway_up_down(env) # training options for asterix (also specifies the environment) if args.training_wrapper == 'asterix_fear_only': env = super_simple_dqn_wrapper.fear_only(env) if args.training_wrapper == 'asterix_bonus_life_in_game': env = super_simple_dqn_wrapper.asterix_bonus_life_in_game(env) if args.training_wrapper == 'asterix_fear_in_game': env = super_simple_dqn_wrapper.asterix_fear_in_game(env) # training options for alien (also specifies the environment) if args.training_wrapper == 'alien_fear_only': env = super_simple_dqn_wrapper.fear_only(env) if args.training_wrapper == 'alien_pulsar_only': env = super_simple_dqn_wrapper.alien_pulsar_only(env) if args.training_wrapper == 'alien_eggs_only': env = super_simple_dqn_wrapper.alien_eggs_only(env) if args.training_wrapper == 'alien_eggs_pulsar_only': env = super_simple_dqn_wrapper.alien_eggs_pulsar_only(env) if args.training_wrapper == 'alien_eggs_fear_only': env = super_simple_dqn_wrapper.alien_eggs_fear_only(env) if args.training_wrapper == 'alien_eggs_in_game': env = super_simple_dqn_wrapper.alien_eggs_in_game(env) if args.training_wrapper == 'alien_pulsar_fear_only': env = super_simple_dqn_wrapper.alien_pulsar_fear_only(env) if args.training_wrapper == 'alien_pulsar_in_game': env = super_simple_dqn_wrapper.alien_pulsar_in_game(env) if args.training_wrapper == 'alien_fear_in_game': env = super_simple_dqn_wrapper.alien_fear_in_game(env) return env
import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set() from scipy import stats from forkan.models import VAE from baselines.common.cmd_util import make_env env_id = 'Pendulum-v0' env_type = 'classic_control' env = make_env(env_id, env_type, vae_pend=True) env.reset() v = VAE(load_from='pend-optimal', network='pendulum') t = 0 idx = 2 ths = [] zss = [] thds = [] zdots = [] old_z = 0