def initEnviroment(self): print('Initialize env') # update to your env = EnvironmentWrapper() env.initEnviroment(UNITY_ENVIROMENT) print('observation space:', env.state_dim) print('action space:', env.action_dim) # get the default brain print('Env init done') self.config.model_name = MODEL self.config.env = env
def func(): env = gym.make(gym_id) env = TimeLimit(env, max_episode_steps = args.max_episode_len) env = EnvironmentWrapper(env.env, normOb=normOb, rewardNormalization=rewardNormalization, clipOb=clipOb, clipRew=clipRew, **kwargs) env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) return env
rewardNormalization = None normOb = False if args.plus: if args.plus_returns: rewardNormalization = "returns" if args.plus_plus: if args.plus_plus_observation_normalization: normOb = True if args.plus_plus_observation_clipping > 0: clipOb = args.plus_plus_observation_clipping if args.plus_plus_reward_clipping > 0: clipRew = args.plus_plus_reward_clipping env = EnvironmentWrapper(env.env, normOb=normOb, rewardNormalization=rewardNormalization, clipOb=clipOb, clipRew=clipRew, gamma=args.gamma) np.random.seed(args.seed) env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) tf.set_random_seed(args.seed) discreteActionsSpace = utils.is_discrete(env) inputLength = env.observation_space.shape[0] outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[ 0]
help='gradient l2 norm, negative value to turn it off') args = parser.parse_args() dtype = tf.float32 dtypeNp = np.float32 if not args.seed: args.seed = int(time.time()) graph = tf.Graph() with tf.Session(graph=graph) as sess: env = gym.make(args.gym_id) if not args.minimal: env = EnvironmentWrapper(env.env, normOb=True, rewardNormalization="returns", clipOb=10., clipRew=10., episodicMeanVarObs=False, episodicMeanVarRew=False, gamma=args.gamma) else: env = EnvironmentWrapper(env.env, normOb=False, rewardNormalization=None, clipOb=1000000., clipRew=1000000) np.random.seed(args.seed) env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) tf.set_random_seed(args.seed) discreteActionsSpace = utils.is_discrete(env) inputLength = env.observation_space.shape[0] outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[0] #summeries placeholders and summery scalar objects
args = parser.parse_args() if not args.seed: args.seed = int(time.time()) if args.buffer_size == -1: args.buffer_size = args.total_train_steps print( "\nBuffer size not specified. Taking value of {} which is the same as total_train_steps, as suggested by the paper\n" .format(args.buffer_size)) graph = tf.Graph() with tf.Session(graph=graph) as sess: env = gym.make(args.gym_id) env = EnvironmentWrapper(env.env, args.norm_obs, args.norm_rew, args.clip_obs, args.clip_rew) np.random.seed(args.seed) env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) tf.set_random_seed(args.seed) if utils.is_discrete(env): exit("TD3 can only be applied to continuous action space environments") inputLength = env.observation_space.shape[0] outputLength = env.action_space.shape[0] #summeries placeholders and summery scalar objects epRewPh = tf.placeholder(tf.float32, shape=None,