コード例 #1
0
import gc
import gym
from agent.agent import Agent


MAX_EPISODES = 5


env = gym.make('BipedalWalker-v2')
state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]

agent = Agent(state_size, action_size)
state = env.reset()
for _ in range(int(1024)):
    action = agent(state) + agent.get_noise()
    next_state, reward, done, info = env.step(action)
    agent.append(state, action, reward, done, next_state)
    state = next_state
    if done:
        state = env.reset()


for _ep in range(MAX_EPISODES):
    state = env.reset()
    count = 0
    while True:
        count += 1
        # env.render()
        action = agent(state) + agent.get_noise()
        next_state, reward, done, info = env.step(action)
コード例 #2
0
MAX_EPISODES = 1000

# env = gym.make('BipedalWalker-v2')
env = gym.make("Pendulum-v0")
print(env.action_space.high)
print(env.action_space.low)
print(env.observation_space.high)
print(env.observation_space.low)

state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
agent = Agent(state_size, action_size, ACCESS_SIZE)

state = env.reset()
for _ in range(ACCESS_SIZE):
    action = np.clip(2 * agent(state) + agent.get_noise(), -2, 2)
    next_state, reward, done, info = env.step(action)
    agent.append(state, action, reward, done, next_state)
    state = next_state
    if done:
        state = env.reset()


def to_np(scale):
    return np.array([scale])


viz_reward = viz.line(X=to_np(0), Y=to_np(0))
time.sleep(1)
viz_length = viz.line(X=to_np(0), Y=to_np(0))
コード例 #3
0
MAX_EPISODES = 1000

env = gym.make('BipedalWalker-v2')
print(env.action_space.high)
print(env.action_space.low)
print(env.observation_space.high)
print(env.observation_space.low)

state_size = env.observation_space.shape[0]
action_size = env.action_space.shape[0]
agent = Agent(state_size, action_size, ACCESS_SIZE)
agent.restore_models(1000)

state = env.reset()
for _ in range(ACCESS_SIZE):
    action = np.clip(agent(state) + agent.get_noise(), -1, 1)
    next_state, reward, done, info = env.step(action)
    agent.append(state, action, reward, done, next_state)
    state = next_state
    if done:
        state = env.reset()


def to_np(scale):
    return np.array([scale])


viz_reward = viz.line(X=to_np(0), Y=to_np(0))
time.sleep(1)
viz_length = viz.line(X=to_np(0), Y=to_np(0))