コード例 #1
0
"""environment"""
env = gym.make(args.env_name)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
is_disc_action = len(env.action_space.shape) == 0
if args.use_running_state:
    running_state = ZFilter((state_dim,), clip=5)  # running list of states that allows to access precise mean and std
else:
    running_state = None

"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)

policy_net = Policy(state_dim, action_dim, log_std=args.log_std)
value_net = Value(state_dim)
policy_net.to(device)
value_net.to(device)

agent_trpo = Agent(env, policy_net, device, running_state=running_state, render=args.render, num_threads=1)

def update_params_trpo(batch):
    # (3)
    states = torch.from_numpy(np.stack(batch.state)).to(args.dtype).to(device)
    actions = torch.from_numpy(np.stack(batch.action)).to(args.dtype).to(device)
    rewards = torch.from_numpy(np.stack(batch.reward)).to(args.dtype).to(device)
    masks = torch.from_numpy(np.stack(batch.mask)).to(args.dtype).to(device)
    with torch.no_grad():
        values = value_net(states)  # estimate value function of each state with NN
コード例 #2
0
state_dim = env.observation_space.shape[0]
is_disc_action = len(env.action_space.shape) == 0
# running_state = ZFilter((state_dim,), clip=5)
running_state = None
# running_reward = ZFilter((1,), demean=False, clip=10)
"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)
"""define actor and critic"""
if args.model_path is None:
    if is_disc_action:
        policy_net = DiscretePolicy(state_dim, env.action_space.n)
    else:
        policy_net = Policy(state_dim,
                            env.action_space.shape[0],
                            log_std=args.log_std)
    value_net = Value(state_dim)
else:
    policy_net, value_net, running_state = pickle.load(
        open(args.model_path, "rb"))
policy_net.to(device)
value_net.to(device)
"""create agent"""
agent = Agent(env,
              policy_net,
              device,
              running_state=running_state,
              render=args.render,
              num_threads=args.num_threads)