class CDQNAgent(Agent): def __init__(self, role_id, weight_path): def role2agent(role): if role == 2: return 'agent1' elif role == 1: return 'agent3' else: return 'agent2' super().__init__(role_id) agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99) self.predictor = Predictor(OfflinePredictor( PredictConfig(model=model, session_init=SaverRestore(weight_path), input_names=[ role2agent(role_id) + '/state', role2agent(role_id) + '_comb_mask', role2agent(role_id) + '/fine_mask' ], output_names=[role2agent(role_id) + '/Qvalue'])), num_actions=(1000, 21)) def intention(self, env): handcards = env.get_curr_handcards() last_two_cards = env.get_last_two_cards() prob_state = env.get_state_prob() intention = self.predictor.predict(handcards, last_two_cards, prob_state) return intention
def __init__(self, role_id, weight_path): def role2agent(role): if role == 2: return 'agent1' elif role == 1: return 'agent3' else: return 'agent2' super().__init__(role_id) agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99) self.predictor = Predictor(OfflinePredictor(PredictConfig( model=model, session_init=SaverRestore(weight_path), input_names=[role2agent(role_id) + '/state', role2agent(role_id) + '_comb_mask', role2agent(role_id) + '/fine_mask'], output_names=[role2agent(role_id) + '/Qvalue'])), num_actions=(1000, 21))
def _setup_graph(self): # self.lord_win_rate = tf.get_variable('lord_win_rate', shape=[], initializer=tf.constant_initializer(0.), # trainable=False) nr_proc = min(multiprocessing.cpu_count() // 2, 1) self.predictors = {n: Predictor( self.trainer.get_predictor([n + '/state:0', n + '_comb_mask:0', n + '/fine_mask:0'], [n + '/Qvalue:0'])) for n in self.agent_names} self.pred_funcs = [self.predictors] * nr_proc
def __init__(self, weight_path): super().__init__() agent_names = ['agent%d' % i for i in range(1, 4)] model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99) self.predictors = {n: Predictor(OfflinePredictor(PredictConfig( model=model, session_init=SaverRestore(weight_path), input_names=[n + '/state', n + '_comb_mask', n + '/fine_mask'], output_names=[n + '/Qvalue'])), num_actions=(1000, 21)) for n in self.get_all_agent_names()}
def _setup_graph(self): # self.lord_win_rate = tf.get_variable('lord_win_rate', shape=[], initializer=tf.constant_initializer(0.), # trainable=False) nr_proc = min(multiprocessing.cpu_count() // 2, 1) self.pred_funcs = [ Predictor( self.trainer.get_predictor(self.input_names, self.output_names)) ] * nr_proc
def _setup_graph(self): self.curr_predictor = self.trainer.get_predictor([ self.agent_name + '/state:0', self.agent_name + '_comb_mask:0', self.agent_name + '/fine_mask:0' ], [self.agent_name + '/Qvalue:0']) self.predictors = { n: Predictor( self.trainer.get_predictor( [n + '/state:0', n + '_comb_mask:0', n + '/fine_mask:0'], [n + '/Qvalue:0'])) for n in self.player.get_all_agent_names() }