def get_mask(self): if self.act == ACT_TYPE.PASSIVE: decision_mask, response_mask, bomb_mask, _ = get_mask_alter( self.curr_handcards_char, to_char(self.last_cards_value), self.category) if self.mode == MODE.PASSIVE_DECISION: return decision_mask elif self.mode == MODE.PASSIVE_RESPONSE: return response_mask elif self.mode == MODE.PASSIVE_BOMB: return bomb_mask elif self.mode == MODE.MINOR_RESPONSE: input_single, input_pair, _, _ = get_masks( self.curr_handcards_char, None) if self.minor_type == 1: mask = np.append(input_pair, [0, 0]) else: mask = input_single for v in set(self.intention): mask[v - 3] = 0 return mask elif self.act == ACT_TYPE.ACTIVE: decision_mask, response_mask, _, length_mask = get_mask_alter( self.curr_handcards_char, [], self.category) if self.mode == MODE.ACTIVE_DECISION: return decision_mask elif self.mode == MODE.ACTIVE_RESPONSE: return response_mask[self.active_decision] elif self.mode == MODE.ACTIVE_SEQ: return length_mask[self.active_decision][self.active_response] elif self.mode == MODE.MINOR_RESPONSE: input_single, input_pair, _, _ = get_masks( self.curr_handcards_char, None) if self.minor_type == 1: mask = np.append(input_pair, [0, 0]) else: mask = input_single for v in set(self.intention): mask[v - 3] = 0 return mask
def play_one_episode(env, func): def take_action_from_prob(prob, mask): prob = prob[0] # to avoid numeric difficulty prob[mask == 0] = -1 return np.argmax(prob) # return char minor cards output def inference_minor_util60(role_id, handcards, num, is_pair, dup_mask, main_cards_char): for main_card in main_cards_char: handcards.remove(main_card) s = get_mask(handcards, action_space, None).astype(np.float32) outputs = [] minor_type = 1 if is_pair else 0 for i in range(num): input_single, input_pair, _, _ = get_masks(handcards, None) _, _, _, _, _, _, minor_response_prob = func( [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.array([minor_type])] ) # give minor cards mask = None if is_pair: mask = np.concatenate([input_pair, [0, 0]]) * dup_mask else: mask = input_single * dup_mask minor_response = take_action_from_prob(minor_response_prob, mask) dup_mask[minor_response] = 0 # convert network output to char cards handcards.remove(to_char(minor_response + 3)) if is_pair: handcards.remove(to_char(minor_response + 3)) s = get_mask(handcards, action_space, None).astype(np.float32) # save to output outputs.append(to_char(minor_response + 3)) if is_pair: outputs.append(to_char(minor_response + 3)) return outputs def inference_minor_cards60(role_id, category, s, handcards, seq_length, dup_mask, main_cards_char): if category == Category.THREE_ONE.value: return inference_minor_util60(role_id, handcards, 1, False, dup_mask, main_cards_char) if category == Category.THREE_TWO.value: return inference_minor_util60(role_id, handcards, 1, True, dup_mask, main_cards_char) if category == Category.THREE_ONE_LINE.value: return inference_minor_util60(role_id, handcards, seq_length, False, dup_mask, main_cards_char) if category == Category.THREE_TWO_LINE.value: return inference_minor_util60(role_id, handcards, seq_length, True, dup_mask, main_cards_char) if category == Category.FOUR_TWO.value: return inference_minor_util60(role_id, handcards, 2, False, dup_mask, main_cards_char) env.reset() init_cards = np.arange(21) # init_cards = np.append(init_cards[::4], init_cards[1::4]) env.prepare_manual(init_cards) r = 0 while r == 0: last_cards_value = env.get_last_outcards() last_cards_char = to_char(last_cards_value) last_out_cards = Card.val2onehot60(last_cards_value) last_category_idx = env.get_last_outcategory_idx() curr_cards_char = to_char(env.get_curr_handcards()) is_active = True if last_cards_value.size == 0 else False s = get_mask(curr_cards_char, action_space, None if is_active else last_cards_char).astype(np.float32) last_state = get_mask(last_cards_char, action_space, None).astype(np.float32) # print(s.shape) role_id = env.get_role_ID() # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char) intention = None if role_id == 2: if is_active: # first get mask decision_mask, response_mask, _, length_mask = get_mask_alter(curr_cards_char, [], last_category_idx) _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func( [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.zeros([s.shape[0]])] ) # make decision depending on output active_decision = take_action_from_prob(active_decision_prob, decision_mask) active_category_idx = active_decision + 1 # get response active_response = take_action_from_prob(active_response_prob, response_mask[active_decision]) seq_length = 0 # next sequence length if active_category_idx == Category.SINGLE_LINE.value or \ active_category_idx == Category.DOUBLE_LINE.value or \ active_category_idx == Category.TRIPLE_LINE.value or \ active_category_idx == Category.THREE_ONE_LINE.value or \ active_category_idx == Category.THREE_TWO_LINE.value: seq_length = take_action_from_prob(active_seq_prob, length_mask[active_decision][active_response]) + 1 # give main cards intention = give_cards_without_minor(active_response, last_cards_value, active_category_idx, seq_length) # then give minor cards if active_category_idx == Category.THREE_ONE.value or \ active_category_idx == Category.THREE_TWO.value or \ active_category_idx == Category.THREE_ONE_LINE.value or \ active_category_idx == Category.THREE_TWO_LINE.value or \ active_category_idx == Category.FOUR_TWO.value: dup_mask = np.ones([15]) if seq_length > 0: for i in range(seq_length): dup_mask[intention[0] - 3 + i] = 0 else: dup_mask[intention[0] - 3] = 0 intention = np.concatenate([intention, to_value(inference_minor_cards60(role_id, active_category_idx, s.copy(), curr_cards_char.copy(), seq_length, dup_mask, to_char(intention)))]) else: # print(to_char(last_cards_value), is_bomb, last_category_idx) decision_mask, response_mask, bomb_mask, _ = get_mask_alter(curr_cards_char, to_char(last_cards_value), last_category_idx) passive_decision_prob, passive_bomb_prob, passive_response_prob, _, _, _, _ = func( [np.array([role_id]), s.reshape(1, -1), last_state.reshape(1, -1), np.zeros([s.shape[0]])]) passive_decision = take_action_from_prob(passive_decision_prob, decision_mask) if passive_decision == 0: intention = np.array([]) elif passive_decision == 1: passive_bomb = take_action_from_prob(passive_bomb_prob, bomb_mask) # converting 0-based index to 3-based value intention = np.array([passive_bomb + 3] * 4) elif passive_decision == 2: intention = np.array([16, 17]) elif passive_decision == 3: passive_response = take_action_from_prob(passive_response_prob, response_mask) intention = give_cards_without_minor(passive_response, last_cards_value, last_category_idx, None) if last_category_idx == Category.THREE_ONE.value or \ last_category_idx == Category.THREE_TWO.value or \ last_category_idx == Category.THREE_ONE_LINE.value or \ last_category_idx == Category.THREE_TWO_LINE.value or \ last_category_idx == Category.FOUR_TWO.value: dup_mask = np.ones([15]) seq_length = get_seq_length(last_category_idx, last_cards_value) if seq_length: for i in range(seq_length): dup_mask[intention[0] - 3 + i] = 0 else: dup_mask[intention[0] - 3] = 0 intention = np.concatenate([intention, to_value(inference_minor_cards60(role_id, last_category_idx, s.copy(), curr_cards_char.copy(), seq_length, dup_mask, to_char(intention)))]) # since step auto needs full last card group info, we do not explicitly feed card type r, _, _ = env.step_manual(intention) # print('lord gives', to_char(intention)) assert (intention is not None) else: intention, r, _ = env.step_auto() # print('farmer gives', to_char(intention)) # if r > 0: # print('farmer wins') # else: # print('lord wins') return int(r > 0)