Python StationaryAgent Examples

Programming Language: Python

Namespace/Package Name: malib.agents.tabular.q_learning.base_tabular_agent

Class/Type: StationaryAgent

Examples at hotexamples.com: 5

Python StationaryAgent - 5 examples found. These are the top rated real world Python examples of malib.agents.tabular.q_learning.base_tabular_agent.StationaryAgent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

normalize(4)

sample(1)

Frequently Used Methods

normalize (4)

sample (1)

Example #1

Show file

 def update_policy(self, s, a, game):
     delta = self.delta
     if a == np.argmax(self.Q[s]):
         self.pi[s][a] += delta
     else:
         self.pi[s][a] -= delta / (self.action_num - 1)
     StationaryAgent.normalize(self.pi[s])
     self.pi_history.append(deepcopy(self.pi))

Example #2

Show file

File: minmax_q.py Project: uoe-agents/GPL

 def update_policy(self, s, a, env):
     self.initialize_solvers()
     for solver, lib in self.solvers:
         try:
             self.pi[s] = self.lp_solve(self.Q[s], solver, lib)
             StationaryAgent.normalize(self.pi[s])
             self.pi_history.append(deepcopy(self.pi))
         except Exception as e:
             print('optimization using {} failed: {}'.format(solver, e))
             continue
         else:
             break

Example #3

Show file

File: ema_q.py Project: uoe-agents/GPL

    def update_policy(self, s, a, game):
        if a == np.argmax(self.Q[s]):
            delta = self.delta1
            vi = np.zeros(self.action_num)
            vi[a] = 1.
        else:
            delta = self.delta2
            vi = np.zeros(self.action_num)
            vi[a] = 0.

        self.pi[s] = (1 - delta) * self.pi[s] + delta * vi
        StationaryAgent.normalize(self.pi[s])
        self.pi_history.append(deepcopy(self.pi))

Example #4

Show file

 def update_policy(self, s, a, game):
     V = np.dot(self.pi[s], self.Q[s])
     delta_hat_A = np.zeros(self.action_num)
     delta_A = np.zeros(self.action_num)
     for ai in range(self.action_num):
         if self.pi[s][ai] == 1:
             delta_hat_A[ai] = self.Q[s][ai] - V
         else:
             delta_hat_A[ai] = (self.Q[s][ai] - V) / (1 - self.pi[s][ai])
         delta_A[ai] = delta_hat_A[ai] - self.gamma * abs(
             delta_hat_A[ai]) * self.pi[s][ai]
     self.pi[s] += self.eta * delta_A
     StationaryAgent.normalize(self.pi[s])
     self.pi_history.append(deepcopy(self.pi))

Example #5

Show file

File: base_q.py Project: uoe-agents/GPL

 def act(self, s, exploration, game):
     if exploration and random.random() < self.episilon:
         return random.randint(0, self.action_num - 1)
     else:
         if self.verbose:
             for s in self.Q.keys():
                 print('{}--------------'.format(self.id_))
                 print('Q of agent {}: state {}: {}'.format(
                     self.id_, s, str(self.Q[s])))
                 # print('QAof agent {}: state {}: {}'.format(self.id_, s, str(self.Q_A[s])))
                 # self.Q_A
                 print('pi of agent {}: state {}: {}'.format(
                     self.id_, s, self.pi[s]))
                 # print('pi of opponent agent {}: state{}: {}'.format(self.id_, s, self.opponent_best_pi[s]))
                 print('{}--------------'.format(self.id_))
         # print()
         return StationaryAgent.sample(self.pi[s])