import gc import gym from agent.agent import Agent MAX_EPISODES = 5 env = gym.make('BipedalWalker-v2') state_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] agent = Agent(state_size, action_size) state = env.reset() for _ in range(int(1024)): action = agent(state) + agent.get_noise() next_state, reward, done, info = env.step(action) agent.append(state, action, reward, done, next_state) state = next_state if done: state = env.reset() for _ep in range(MAX_EPISODES): state = env.reset() count = 0 while True: count += 1 # env.render() action = agent(state) + agent.get_noise() next_state, reward, done, info = env.step(action)
MAX_EPISODES = 1000 # env = gym.make('BipedalWalker-v2') env = gym.make("Pendulum-v0") print(env.action_space.high) print(env.action_space.low) print(env.observation_space.high) print(env.observation_space.low) state_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] agent = Agent(state_size, action_size, ACCESS_SIZE) state = env.reset() for _ in range(ACCESS_SIZE): action = np.clip(2 * agent(state) + agent.get_noise(), -2, 2) next_state, reward, done, info = env.step(action) agent.append(state, action, reward, done, next_state) state = next_state if done: state = env.reset() def to_np(scale): return np.array([scale]) viz_reward = viz.line(X=to_np(0), Y=to_np(0)) time.sleep(1) viz_length = viz.line(X=to_np(0), Y=to_np(0))
MAX_EPISODES = 1000 env = gym.make('BipedalWalker-v2') print(env.action_space.high) print(env.action_space.low) print(env.observation_space.high) print(env.observation_space.low) state_size = env.observation_space.shape[0] action_size = env.action_space.shape[0] agent = Agent(state_size, action_size, ACCESS_SIZE) agent.restore_models(1000) state = env.reset() for _ in range(ACCESS_SIZE): action = np.clip(agent(state) + agent.get_noise(), -1, 1) next_state, reward, done, info = env.step(action) agent.append(state, action, reward, done, next_state) state = next_state if done: state = env.reset() def to_np(scale): return np.array([scale]) viz_reward = viz.line(X=to_np(0), Y=to_np(0)) time.sleep(1) viz_length = viz.line(X=to_np(0), Y=to_np(0))