Beispiel #1
0
from ddpg import Agent
from OUNoise import Noise
import matplotlib.pyplot as plt

env = PidEnv(setpoint=20)
batch_size = 128
rewards = []
agent = Agent(num_states=5, num_actions=3)
noise = Noise(num_actions=3)

for episode in range(30):
    state = env.reset()
    noise.reset()
    eps_reward = 0
    for step in range(500):
        action = agent.get_action(state)
        action = noise.get_action(action, step)

        new_state, reward = env.step(action)

        agent.mem.push(state, action, reward, new_state)

        agent.learn(batch_size)

        state = new_state

        eps_reward += reward
    rewards.append(eps_reward)


plt.plot(rewards)
Beispiel #2
0
        setpoints.append(curr)
    agent.metalearn(setpoints)


    for episode in range(1):
        noise.reset()
        eps_reward = 0
        print(episode)
        best_parameters = [(0,0,0), 0, 0]
        step = 0
        for i in range(total_steps):
            print(step)
            step += 1
            setpoint = 20 if random == False else np.random.random()*100
            state = env.reset(setpoint)
            action_before = agent.get_action(state)
            action = noise.get_action(action_before, step)
            if np.isnan(action[0]).any():
                print(action)
                print(action_before)
                print(state)
                for i in agent.actor.parameters():
                    print(i)
                exit = True
                break
    
            new_state, reward = env.step(action)
            if reward > -1000000:
                agent.mem.push(state, action, reward, new_state)
            if reward > -100:
                normalized.append(reward)
Beispiel #3
0
from utils import fetch_protein
from protein import ProteinState
from ddpg import Agent, ReplayBuffer


EPISODES = 10000
STEPS = 500

if __name__ == "__main__":
    goal_state = fetch_protein("2jof")
    state_dim = goal_state.n_residues() * 2
    action_dim = goal_state.n_residues() * 2
    buffer = ReplayBuffer(10000)
    agent = Agent(state_dim, action_dim, (0, 360))

    for _ in range(EPISODES):
        data = {"state": ProteinState(n_residues=goal_state.n_residues())}
        for _ in range(STEPS):
            action = agent.get_action(data["state"])
            next_state = data["state"].do_action(action)
            reward = data["state"].eval_state() - next_state.eval_state()

            buffer.append(data["state"], action, reward, next_state)

            agent.update(buffer)

            print(data["state"].l2_norm(goal_state))
            data["state"] = ProteinState(angles=next_state.angles())