Esempio n. 1
0
def main(folderName):

    # folderName = './log-files/SecondModel/Jan-30_20_38_36'
    # chkp.print_tensors_in_checkpoint_file("{}/value_function.ckpt".format(folderName), tensor_name='', all_tensors=True)

    env = myEnv()
    obs_dim = len(env.observation_space())
    act_dim = len(env.action_space())
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())

    policy = Policy(obs_dim, act_dim, 0.003)
    policy.restore(folderName)

    trajectories = run_policy(env, policy, episodes=20)
Esempio n. 2
0
def init_env(env_name):
	'''
	Inicialize my environment and return dimension of observation and action spaces. 

	Args:
		Env_name: str environment name (e.g. "SecondModel")
	Returns: 3-tuple
		Environment (object)
		Number of observation dimension (int)
		Number of action dimensions (int)
	'''
	env = myEnv()
	obs_dim = len(env.observation_space())
	act_dim = len(env.action_space())

	return env, obs_dim, act_dim
Esempio n. 3
0
def init_gym(env_name):
    """
    Initialize gym environment, return dimension of observation
    and action spaces.

    Args:
        env_name: str environment name (e.g. "Humanoid-v1")

    Returns: 3-tuple
        gym environment (object)
        number of observation dimensions (int)
        number of action dimensions (int)
    """

    #env = gym.make(env_name)
    env = myEnv()
    obs_dim = len(env.observation_space())
    act_dim = len(env.action_space())

    return env, obs_dim, act_dim
Esempio n. 4
0
def main(folderName, bestMode):

	# folderName = './log-files/SecondModel/Jan-30_20_38_36'
	# chkp.print_tensors_in_checkpoint_file("{}/value_function.ckpt".format(folderName), tensor_name='', all_tensors=True)

	env = myEnv()
	if not bestMode:
		obs_dim = len(env.observation_space())
		act_dim = len(env.action_space())
		obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
		scaler = Scaler(obs_dim)

		policy = Policy(obs_dim, act_dim, 0.003)
		policy.restore(folderName)

		trajectories = run_policy(env, policy, scaler, episodes=20)
	else:
		sequences = recoverSequences(folderName)
		for sequence in sequences:
			runSeq(sequence['actions'], env)