Exemple #1
0
 def initEnviroment(self):
     print('Initialize env')
     # update to your
     env = EnvironmentWrapper()
     env.initEnviroment(UNITY_ENVIROMENT)
     print('observation space:', env.state_dim)
     print('action space:', env.action_dim)
     # get the default brain
     print('Env init done')
     self.config.model_name = MODEL
     self.config.env = env
Exemple #2
0
 def func():
     env = gym.make(gym_id)
     env = TimeLimit(env, max_episode_steps = args.max_episode_len)
     env = EnvironmentWrapper(env.env, normOb=normOb, rewardNormalization=rewardNormalization, clipOb=clipOb, clipRew=clipRew, **kwargs)
     env.seed(args.seed)
     env.action_space.seed(args.seed)
     env.observation_space.seed(args.seed)  
     return env
    rewardNormalization = None
    normOb = False
    if args.plus:
        if args.plus_returns:
            rewardNormalization = "returns"
    if args.plus_plus:
        if args.plus_plus_observation_normalization:
            normOb = True
        if args.plus_plus_observation_clipping > 0:
            clipOb = args.plus_plus_observation_clipping
        if args.plus_plus_reward_clipping > 0:
            clipRew = args.plus_plus_reward_clipping

    env = EnvironmentWrapper(env.env,
                             normOb=normOb,
                             rewardNormalization=rewardNormalization,
                             clipOb=clipOb,
                             clipRew=clipRew,
                             gamma=args.gamma)

    np.random.seed(args.seed)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    env.observation_space.seed(args.seed)
    tf.set_random_seed(args.seed)

    discreteActionsSpace = utils.is_discrete(env)

    inputLength = env.observation_space.shape[0]
    outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[
        0]
                   help='gradient l2 norm, negative value to turn it off')

args = parser.parse_args()

dtype = tf.float32
dtypeNp = np.float32

if not args.seed:
    args.seed = int(time.time())

graph = tf.Graph()
with tf.Session(graph=graph) as sess:
    
    env = gym.make(args.gym_id)
    if not args.minimal:
        env = EnvironmentWrapper(env.env, normOb=True, rewardNormalization="returns", clipOb=10., clipRew=10., episodicMeanVarObs=False, episodicMeanVarRew=False, gamma=args.gamma)         
    else:
        env = EnvironmentWrapper(env.env, normOb=False, rewardNormalization=None, clipOb=1000000., clipRew=1000000)    
        
    np.random.seed(args.seed)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    env.observation_space.seed(args.seed)    
    tf.set_random_seed(args.seed)

    discreteActionsSpace = utils.is_discrete(env)
    
    inputLength = env.observation_space.shape[0]
    outputLength = env.action_space.n if discreteActionsSpace else env.action_space.shape[0]
    
    #summeries placeholders and summery scalar objects
Exemple #5
0
args = parser.parse_args()

if not args.seed:
    args.seed = int(time.time())

if args.buffer_size == -1:
    args.buffer_size = args.total_train_steps
    print(
        "\nBuffer size not specified. Taking value of {} which is the same as total_train_steps, as suggested by the paper\n"
        .format(args.buffer_size))

graph = tf.Graph()
with tf.Session(graph=graph) as sess:

    env = gym.make(args.gym_id)
    env = EnvironmentWrapper(env.env, args.norm_obs, args.norm_rew,
                             args.clip_obs, args.clip_rew)
    np.random.seed(args.seed)
    env.seed(args.seed)
    env.action_space.seed(args.seed)
    env.observation_space.seed(args.seed)
    tf.set_random_seed(args.seed)

    if utils.is_discrete(env):
        exit("TD3 can only be applied to continuous action space environments")

    inputLength = env.observation_space.shape[0]
    outputLength = env.action_space.shape[0]

    #summeries placeholders and summery scalar objects
    epRewPh = tf.placeholder(tf.float32,
                             shape=None,