def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type == 'threepass': env = make_m_three_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'pass': env = make_multi_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'x_pass': env = make_x_pass_env(env_id, env_type, nenv, seed, args) elif env_type == 'leftward': env = make_m_leftward_env(env_id, env_type, nenv, seed, args) elif env_type == 'island': env = make_m_island_env(env_id, env_type, nenv, seed, args) elif env_type == 'x_island': env = make_m_x_island_env(env_id, env_type, nenv, seed, args) elif env_type == 'pushball': env = make_m_pushball_env(env_id, env_type, nenv, seed, args) elif env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(num_env,alg,seed,env_type,env_id,reward_scale,gamestate=None): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = num_env or ncpu if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=gamestate, reward_scale=reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.20)) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) normalize_value = args.normalize_value if (env_type == 'mujoco' or env_type=='roboschool') and normalize_value: env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args, extra_args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = args.seed env_type, env_id = get_env_type(args.env) if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': env = atari_wrappers.make_atari(env_id, None) env.seed(seed) env = bench.Monitor(env, logger.get_dir()) env = atari_wrappers.wrap_deepmind(env, frame_stack=True) elif alg == 'trpo_mpi': env = atari_wrappers.make_atari(env_id, None) env.seed(seed) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env = atari_wrappers.wrap_deepmind(env) # TODO check if the second seeding is necessary, and eventually remove env.seed(seed) else: frame_stack_size = 4 weights = extra_args['weights'] if 'weights' in extra_args else None env = VecFrameStack( make_vec_env(env_id, env_type, nenv, seed, weights=weights), frame_stack_size) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) # Sgillen: added guillaume's fix for the vec normalize loading # ============================================================================== if args.__contains__('load_path'): sess = get_session() loaded_params = joblib.load(osp.expanduser(args.load_path)) restores = [] for v in tf.trainable_variables(): restores.append(v.assign(loaded_params[v.name])) sess.run(restores) env.ob_rms._set_mean_var_count() env.ret_rms._set_mean_var_count() # print(dir(env.ret_rms)) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = args.seed env_type, env_id = get_env_type(args.env) if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor(env, logger.get_dir()) env = atari_wrappers.wrap_deepmind(env, frame_stack=True) elif alg == 'trpo_mpi': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env = atari_wrappers.wrap_deepmind(env) # TODO check if the second seeding is necessary, and eventually remove env.seed(seed) else: frame_stack_size = 4 env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size) elif env_type == 'retro': import retro gamestate = args.gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE) env.seed(args.seed) env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) else: get_session( tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args, train=True): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) else: frame_stack_size = 4 if train: env = make_vec_env(env_id, env_type, args.num_env or ncpu, seed, reward_scale=args.reward_scale) else: env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: flatten_dict_observations = alg not in {'her'} if train: env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) else: env = make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) # if env_type == 'mujoco': # env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} if args.env_json: with open(args.env_json) as f: env_kwargs = json.loads(f.read( )) # need to corresponding to env.__init__ arguments env = make_vec_env( env_id, env_type, args.num_env or 1, seed, env_kwargs=env_kwargs, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) else: env = make_vec_env( env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args, silent_monitor, prio_args=None): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale, prio_args=prio_args, silent_monitor=silent_monitor) if prio_args is None: env = VecFrameStack(env, frame_stack_size) else: env = PrioVecFrameStack(env, frame_stack_size) # TODO prio vec frame stack else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) num_env = args.n_active_envs if prio_args is None else args.num_env flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, prio_args=prio_args, silent_monitor=silent_monitor) if env_type == 'mujoco': if prio_args is None: env = VecNormalize(env) else: env = PrioVecNormalize(env) return env
def build_env(args, normalize_ob=True, is_eval=False): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 #nenv = num_env or ncpu nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 #env = make_vec_env(env_id, env_type, 1, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: '''gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)''' config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': logger.log('build_env: normalize_ob', normalize_ob) #sys.exit() env = VecNormalize(env, ob=normalize_ob, is_training=not is_eval, use_tf=True) return env
def build_env(args, extra_args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) elif env_type == "custom": try: if extra_args["step_size"] == "hour": env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) elif extra_args["step_size"] == "day": env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) else: print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.") env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) except KeyError as e: raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.") # wrap it #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M') #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp) logger_dir = logger.get_dir() # hard coded mpi_rank and subrank to 0 env = Monitor(env, logger_dir and os.path.join(logger_dir, "0.0"), allow_early_resets=True) env = DummyVecEnv([lambda: env]) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) print(env_id) #extract the agc_env_name noskip_idx = env_id.find("NoFrameskip") env_name = env_id[:noskip_idx].lower() print("Env Name for Masking:", env_name) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if args.custom_reward != '': from baselines.common.vec_env import VecEnv, VecEnvWrapper import baselines.common.custom_reward_wrapper as W assert isinstance(env,VecEnv) or isinstance(env,VecEnvWrapper) custom_reward_kwargs = eval(args.custom_reward_kwargs) if args.custom_reward == 'pytorch': if args.custom_reward_path == '': assert False, 'no path for reward model' else: env = W.VecPyTorchAtariReward(env, args.custom_reward_path, env_name) else: assert False, 'no such wrapper exist' if env_type == 'mujoco': env = VecNormalize(env) # if env_type == 'atari': # input("Normalizing for ATari game: okay? [Enter]") # #normalize rewards but not observations for atari # env = VecNormalizeRewards(env) return env
def build_env(args, seed): nenv = 1 alg = args.alg # seed = args.seed seed = int(np.random.rand(1) * 101000) print(seed) env_type, env_id = get_env_type(args.env) set_global_seeds(seed) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: # config = tf.ConfigProto(allow_soft_placement=True, # intra_op_parallelism_threads=1, # inter_op_parallelism_threads=1) # config.gpu_options.allow_growth = True # get_session(config=config) sess = tf.InteractiveSession() # env = VecNormalize(make_vec_env(env_id, env_type, 1, seed, reward_scale=args.reward_scale)) env = make_vec_env(env_id, env_type, args.numenv, seed, reward_scale=args.reward_scale) evalenv = make_vec_env(env_id, env_type, args.numenv, seed, reward_scale=args.reward_scale) # if env_type == 'mujoco': # env = VecNormalize(env) # evalenv = VecNormalizeEval(evalenv) # evalenv.ob_rms = env.ob_rms # evalenv.ret_rms = env.ret_rms return env, sess, evalenv
def build_env(args, game_name, method_name, tag): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': logger_dir_path = 'test_logs_{}/{}/{}'.format( tag, game_name, method_name) logger_dir_exist = os.path.exists(logger_dir_path) if not logger_dir_exist: os.makedirs(logger_dir_path) env = make_env(env_id, env_type, seed=seed, logger_dir=logger_dir_path, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed play = args.play mode = args.mode multiplayer = args.multiplayer env_type, env_id = get_env_type(args) isSpaceInvaders = False if "SpaceInvaders" in args.env: isSpaceInvaders = True if env_type in {'atari', 'retro'}: # this should be the only algorithm I'll use if alg == 'deepq': # BEGIN MY CODE # clip reward when training # don't clip when playing to see actual score # add mode in as an environment parameter if play: # if I'm playing to see how well the network scores, I want to unclip rewards env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode}) else: # otherwise, keep the basic reward used by the base algorithm if multiplayer and isSpaceInvaders: # unclip rewards for space invaders multiplayer, I'll do it manually. env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': False}, env_kwargs={'game_mode': mode}) else: env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True, 'clip_rewards': True}, env_kwargs={'game_mode': mode}) # END MY CODE elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True sess = get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) ## Failing to write structure---is it defined in another process? #print('writing session graph--I HOPEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE') #print(sess.graph) #outfile = osp.join(args.storspot, 'tf') if args.storspot else './tf' #file_writer = tf.summary.FileWriter(outfile, sess.graph) #summary_op = tf.summary.merge_all() return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} args_dict = vars(args) del args_dict["seed"] #so that seed won't overwrite the method given by gym.Env class env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, env_kwargs=args_dict) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu #TODO: removed or ncpu print("ncpu = {}".format(ncpu)) print("Nenv = {}".format(nenv)) alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: print("TF config starting...") config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True print("Get session ...") get_session(config=config) flatten_dict_observations = alg not in {'her'} print("Starting make_vec_env") env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def build_env(args): logger = logging.getLogger() coloredlogs.install(level='DEBUG', fmt='%(asctime)s,%(msecs)03d %(filename)s[%(process)d] %(levelname)s %(message)s') logger.setLevel(logging.DEBUG) ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) else: # TODO: Ensure willuse GPU when sent to SLURM (Add as a command-line argument) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) if env_id == "MsPacmanNoFrameskip-v4": env = super_simple_dqn_wrapper.PacmanClearTheBoardRewardsWrapper(env) env = super_simple_dqn_wrapper.FearDeathWrapper(env) elif env_id == "FreewayNoFrameskip-v4": env = super_simple_dqn_wrapper.AltFreewayRewardsWrapper(env) env = super_simple_dqn_wrapper.FreewayUpRewarded(env) env.ale.setDifficulty(1) elif env_id == "JamesbondNoFrameskip-v4": env = super_simple_dqn_wrapper.FearDeathWrapper(env) return env
def build_env(args): ''' Build a vector of n environments ''' ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args.env) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her', 'maddpg'} env = make_vec_env(env_id, env_type, nenv, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations, isMultiAgent=True) return env
def main(): num_env = 1 env_id = "CartPole-v1" env_type = "classic_control" seed = None env = make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, flatten_dict_observations=True, gamestate=None) act = deepq.learn(env, network='mlp', lr=1e-3, total_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback) print("Saving model to cartpole_model.pkl") act.save("cartpole_model.pkl")
def build_env(args): alg = args.alg seed = args.seed # tf config config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env('MujocoQuadForce-v1', 'mujoco', args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) # env = ActionClipWrapper(env) # if env_type == 'mujoco': # env = VecNormalize(env) return env
def main(): # unpause Simulation so that robot receives data on all topics gazebo_connection.GazeboConnection().unpauseSim() # create node rospy.init_node('pickbot_gym', anonymous=True, log_level=rospy.FATAL) env = make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=Monitor, start_index=0, reward_scale=1.0, flatten_dict_observations=True, gamestate=None) act = trpo_mpi.learn(env=env, network='mlp', total_timesteps=0, load_path=modelsdir + "model") obs, done = env.reset(), False episode_rew = 0 while True: obs, rew, done, _ = env.step(act.step(obs)[0]) episode_rew += rew[0] if isinstance(env, VecEnv) else rew done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset()
def setup_eval_env(self, env_name, seed): if env_name == "spaceinvaders": env_id = "SpaceInvadersNoFrameskip-v4" elif env_name == "mspacman": env_id = "MsPacmanNoFrameskip-v4" elif env_name == "videopinball": env_id = "VideoPinballNoFrameskip-v4" elif env_name == "beamrider": env_id = "BeamRiderNoFrameskip-v4" else: env_id = env_name[0].upper() + env_name[1:] + "NoFrameskip-v4" env_type = "atari" #env id, env type, num envs, and seed env = make_vec_env(env_id, env_type, 1, seed, wrapper_kwargs={ 'clip_rewards': False, 'episode_life': False, }) if env_type == 'atari': env = VecFrameStack(env, 4) print("env actions", env.action_space) return env
def make_envs( env_name, num_env, seed, max_eplen, frame_stack_size=4, noop_reset=True, fire_reset=True, eval_dir: Path = None, use_logger=True, video_recorder=False): eval_envs = make_vec_env( env_name, 'atari', num_env=num_env, seed=seed, max_episode_steps=max_eplen, noop_reset=noop_reset, use_logger=use_logger, wrapper_kwargs={'fire_reset': fire_reset}, ) eval_envs = VecFrameStack(eval_envs, frame_stack_size) if video_recorder: eval_envs = VecVideoRecorder( eval_envs, str(eval_dir / 'videos'), record_video_trigger=lambda _: True, video_length=max_eplen, ) return eval_envs
def build_pend_env(args, **kwargs): return make_vec_env(args.env, 'classic_control', args.num_env or 1, args.seed, reward_scale=args.reward_scale, flatten_dict_observations=True)
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == "darwin": ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) # Setup the Tensorflow session config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} # Make the environment here ~~~ env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) print(" ------------- env made ----------------------") if env_type == 'mujoco': rospy.logdebug('env_type == Mujoco') env = VecNormalize(env, use_tf=True) return env
def main(): args = parse_args() format_strs = ['log', 'csv', 'stdout'] if args.tensorboard: format_strs.append('tensorboard') config = parse_config(args.config) outdir = os.path.join(args.outdir, os.path.splitext(os.path.basename(args.config))[0]) logger.configure(dir=outdir, format_strs=format_strs) env_type, env_id = get_env_type(GAME_ENVIRONMENT) env = make_vec_env(env_id, env_type, 1, args.seed) model = trpo_mpi.learn(env=env, network=NETWORK_ARCHITECTURE, total_timesteps=args.total_timesteps, **config) env.close() if args.save: model.save(os.path.join(outdir, 'model'))