Beispiel #1
0
    def __init__(self,
                 env,
                 lr=0.8,
                 y=0.95,
                 step_cost=.0,
                 living_cost=.0,
                 episode_length=100,
                 memory_capacity=100,
                 batch_size=25,
                 eps=0.5,
                 eps_decay=0.999):
        AbstractAgent.__init__(self, eps, eps_decay)
        self.env = env
        self.lr = lr
        self.y = y
        self.step_cost = step_cost
        self.living_cost = living_cost
        self.s0 = env.field.index('s')
        self.episode_length = episode_length
        self.rewards = []
        self.losses = []
        self.state_len = env.width * env.height

        self.nn = Model(in_features=2,
                        hidden=[self.state_len, self.state_len],
                        out_features=len(Agent.actions))

        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.nn.parameters(), lr=0.01)
        self.memory = ReplayMemory(memory_capacity)
        self.batch_size = batch_size
Beispiel #2
0
    def run_episode(self):
        AbstractAgent.run_episode(self)
        s = self.s0
        self.rewards.append(.0)
        for j in range(self.episode_length):
            q_predicted = self.predict_q(s)
            a = np.argmax(q_predicted)
            a = self.select_action(a)
            s1, r, over = self.step(s, Agent.actions[a])
            if s != s1:
                r -= self.step_cost
            r -= self.living_cost

            q_target = q_predicted
            q_target[a] = r + self.y * self.predict_q(s1).max()

            history = self.model.fit(x=self._encode_state(s),
                                     y=np.array([q_target]),
                                     epochs=1,
                                     verbose=False)
            self.losses.append(history.history["loss"][-1])

            s = s1
            self.rewards[-1] += r
            if over:
                break
Beispiel #3
0
 def __init__(self,
              env,
              model,
              lr=0.8,
              y=0.95,
              step_cost=.0,
              living_cost=.0,
              episode_length=100,
              memory_capacity=100,
              batch_size=10,
              eps=0.5,
              eps_decay=0.999):
     AbstractAgent.__init__(self, eps, eps_decay)
     self.env = env
     self.model = model
     self.lr = lr
     self.y = y
     self.step_cost = step_cost
     self.living_cost = living_cost
     self.s0 = env.field.index('s')
     self.episode_length = episode_length
     self.rewards = []
     self.losses = []
     self.memory = ReplayMemory(memory_capacity)
     self.batch_size = batch_size
Beispiel #4
0
 def run_episode(self):
     AbstractAgent.run_episode(self)
     s = self.s0
     self.rewards.append(.0)
     for j in range(self.episode_length):
         a = np.argmax(self.Q[s, :])
         a = self.select_action(a)
         s1, r, over = self.step(s, AbstractAgent.actions[a])
         if s != s1:
             r -= self.step_cost
         r -= self.living_cost
         self.Q[s, a] = self.Q[s, a] + self.lr * (
             r + self.y * np.max(self.Q[s1, :]) - self.Q[s, a])
         s = s1
         self.rewards[-1] += r
         if over:
             break
Beispiel #5
0
 def run_episode(self):
     AbstractAgent.run_episode(self)
     s = self.s0
     self.rewards.append(.0)
     for j in range(self.episode_length):
         q_predicted = self._predict_q(s)
         a = torch.argmax(q_predicted, 0).item()
         a = self.select_action(a)
         s1, r, over = self.step(s, Agent.actions[a])
         if s != s1:
             r -= self.step_cost
         r -= self.living_cost
         self.memory.push(s, a, s1, r)
         s = s1
         self.optimize()
         self.rewards[-1] += r
         if over:
             break
Beispiel #6
0
 def __init__(self,
              env,
              lr=0.8,
              y=0.95,
              step_cost=.0,
              living_cost=.0,
              episode_length=100,
              eps=0.5,
              eps_decay=0.999):
     AbstractAgent.__init__(self, eps, eps_decay)
     self.env = env
     self.lr = lr
     self.y = y
     self.step_cost = step_cost
     self.living_cost = living_cost
     self.Q = np.zeros((env.width * env.height, len(Agent.actions)))
     self.s0 = env.field.index('s')
     self.episode_length = episode_length
     self.rewards = []
Beispiel #7
0
 def __init__(self,
              env,
              model,
              lr=0.8,
              y=0.95,
              step_cost=.0,
              living_cost=.0,
              episode_length=100,
              eps=0.5,
              eps_decay=0.999):
     AbstractAgent.__init__(self, eps, eps_decay)
     self.env = env
     self.lr = lr
     self.y = y
     self.step_cost = step_cost
     self.living_cost = living_cost
     self.s0 = env.field.index('s')
     self.episode_length = episode_length
     self.rewards = []
     self.losses = []
     self.model = model
 def run_episode(self):
     AbstractAgent.run_episode(self)
     s = self.s0
     episode_number = len(self.rewards)
     self.rewards.append(.0)
     for j in range(self.episode_length):
         q_predicted = self._predict_q_policy(s)
         a = torch.argmax(q_predicted, 0).item()
         a = self.select_action(a)
         s1, r, over = self.step(s, Agent.actions[a])
         if s != s1:
             r -= self.step_cost
         r -= self.living_cost
         self.memory.push(s, a, s1, r)
         s = s1
         self.optimize()
         self.rewards[-1] += r
         if over:
             break
     if episode_number % self.target_update == 0:
         self.target_nn.load_state_dict(self.nn.state_dict())
    def __init__(self, env, p=1.0, lr=0.8, y=0.95, step_cost=.0, living_cost=.0, episode_length=100,
                 memory_capacity=100, batch_size=10, target_update=10, eps=0.5, eps_decay=0.999):
        AbstractAgent.__init__(self, eps, eps_decay)
        self.env = env
        self.lr = lr
        self.y = y
        self.step_cost = step_cost
        self.living_cost = living_cost
        q = (1.0 - p) / 2
        self.stochastic_actions = {
            '←': [[0, 2, 3], [p, q, q]],
            '→': [[1, 2, 3], [p, q, q]],
            '↑': [[2, 0, 1], [p, q, q]],
            '↓': [[3, 0, 1], [p, q, q]]
        }
        self.s0 = env.field.index('s')
        self.episode_length = episode_length
        self.rewards = []
        self.losses = []
        self.state_len = env.width * env.height
        self.nn = Model(
            in_features=self.state_len,
            hidden=[],
            out_features=len(Agent.actions))
        self.target_nn = Model(
            in_features=self.state_len,
            hidden=[],
            out_features=len(Agent.actions))
        self.target_nn.load_state_dict(self.nn.state_dict())
        self.target_nn.eval()

        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.nn.parameters(), lr=0.05)
        self.memory = ReplayMemory(memory_capacity)
        self.batch_size = batch_size
        self.target_update = target_update