from ddpg import Agent from OUNoise import Noise import matplotlib.pyplot as plt env = PidEnv(setpoint=20) batch_size = 128 rewards = [] agent = Agent(num_states=5, num_actions=3) noise = Noise(num_actions=3) for episode in range(30): state = env.reset() noise.reset() eps_reward = 0 for step in range(500): action = agent.get_action(state) action = noise.get_action(action, step) new_state, reward = env.step(action) agent.mem.push(state, action, reward, new_state) agent.learn(batch_size) state = new_state eps_reward += reward rewards.append(eps_reward) plt.plot(rewards)
setpoints.append(curr) agent.metalearn(setpoints) for episode in range(1): noise.reset() eps_reward = 0 print(episode) best_parameters = [(0,0,0), 0, 0] step = 0 for i in range(total_steps): print(step) step += 1 setpoint = 20 if random == False else np.random.random()*100 state = env.reset(setpoint) action_before = agent.get_action(state) action = noise.get_action(action_before, step) if np.isnan(action[0]).any(): print(action) print(action_before) print(state) for i in agent.actor.parameters(): print(i) exit = True break new_state, reward = env.step(action) if reward > -1000000: agent.mem.push(state, action, reward, new_state) if reward > -100: normalized.append(reward)
from utils import fetch_protein from protein import ProteinState from ddpg import Agent, ReplayBuffer EPISODES = 10000 STEPS = 500 if __name__ == "__main__": goal_state = fetch_protein("2jof") state_dim = goal_state.n_residues() * 2 action_dim = goal_state.n_residues() * 2 buffer = ReplayBuffer(10000) agent = Agent(state_dim, action_dim, (0, 360)) for _ in range(EPISODES): data = {"state": ProteinState(n_residues=goal_state.n_residues())} for _ in range(STEPS): action = agent.get_action(data["state"]) next_state = data["state"].do_action(action) reward = data["state"].eval_state() - next_state.eval_state() buffer.append(data["state"], action, reward, next_state) agent.update(buffer) print(data["state"].l2_norm(goal_state)) data["state"] = ProteinState(angles=next_state.angles())