class CDQNAgent(Agent):
    def __init__(self, role_id, weight_path):
        def role2agent(role):
            if role == 2:
                return 'agent1'
            elif role == 1:
                return 'agent3'
            else:
                return 'agent2'

        super().__init__(role_id)
        agent_names = ['agent%d' % i for i in range(1, 4)]
        model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double',
                      (1000, 21), 0.99)
        self.predictor = Predictor(OfflinePredictor(
            PredictConfig(model=model,
                          session_init=SaverRestore(weight_path),
                          input_names=[
                              role2agent(role_id) + '/state',
                              role2agent(role_id) + '_comb_mask',
                              role2agent(role_id) + '/fine_mask'
                          ],
                          output_names=[role2agent(role_id) + '/Qvalue'])),
                                   num_actions=(1000, 21))

    def intention(self, env):
        handcards = env.get_curr_handcards()
        last_two_cards = env.get_last_two_cards()
        prob_state = env.get_state_prob()
        intention = self.predictor.predict(handcards, last_two_cards,
                                           prob_state)
        return intention
Exemple #2
0
 def __init__(self, role_id, weight_path):
     def role2agent(role):
         if role == 2:
             return 'agent1'
         elif role == 1:
             return 'agent3'
         else:
             return 'agent2'
     super().__init__(role_id)
     agent_names = ['agent%d' % i for i in range(1, 4)]
     model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99)
     self.predictor = Predictor(OfflinePredictor(PredictConfig(
         model=model,
         session_init=SaverRestore(weight_path),
         input_names=[role2agent(role_id) + '/state', role2agent(role_id) + '_comb_mask', role2agent(role_id) + '/fine_mask'],
         output_names=[role2agent(role_id) + '/Qvalue'])), num_actions=(1000, 21))
Exemple #3
0
 def _setup_graph(self):
     # self.lord_win_rate = tf.get_variable('lord_win_rate', shape=[], initializer=tf.constant_initializer(0.),
     #                trainable=False)
     nr_proc = min(multiprocessing.cpu_count() // 2, 1)
     self.predictors = {n: Predictor(
         self.trainer.get_predictor([n + '/state:0', n + '_comb_mask:0', n + '/fine_mask:0'], [n + '/Qvalue:0'])) for
                        n in self.agent_names}
     self.pred_funcs = [self.predictors] * nr_proc
Exemple #4
0
 def __init__(self, weight_path):
     super().__init__()
     agent_names = ['agent%d' % i for i in range(1, 4)]
     model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99)
     self.predictors = {n: Predictor(OfflinePredictor(PredictConfig(
         model=model,
         session_init=SaverRestore(weight_path),
         input_names=[n + '/state', n + '_comb_mask', n + '/fine_mask'],
         output_names=[n + '/Qvalue'])), num_actions=(1000, 21)) for n in self.get_all_agent_names()}
Exemple #5
0
 def _setup_graph(self):
     # self.lord_win_rate = tf.get_variable('lord_win_rate', shape=[], initializer=tf.constant_initializer(0.),
     #                trainable=False)
     nr_proc = min(multiprocessing.cpu_count() // 2, 1)
     self.pred_funcs = [
         Predictor(
             self.trainer.get_predictor(self.input_names,
                                        self.output_names))
     ] * nr_proc
 def _setup_graph(self):
     self.curr_predictor = self.trainer.get_predictor([
         self.agent_name + '/state:0', self.agent_name + '_comb_mask:0',
         self.agent_name + '/fine_mask:0'
     ], [self.agent_name + '/Qvalue:0'])
     self.predictors = {
         n: Predictor(
             self.trainer.get_predictor(
                 [n + '/state:0', n + '_comb_mask:0', n + '/fine_mask:0'],
                 [n + '/Qvalue:0']))
         for n in self.player.get_all_agent_names()
     }