def calculate_p_hj_gt_hi_accurate(cards, n_j, h_i, verbose=0): if len(cards) == 0: return 1 hj_gt_hi_bool = [] for cards_j in combinations(cards, n_j): bool_ = cards_to_value_sum(cards_j) > h_i if verbose > 1: print(cards_j, bool_) hj_gt_hi_bool.append(bool_) if len(hj_gt_hi_bool) == 0: # because len(cards)<n_j hj_gt_hi_bool.append(cards_to_value_sum(cards) > h_i) successes = np.sum(hj_gt_hi_bool) total = len(hj_gt_hi_bool) #print(n_j, cards, hj_gt_hi_bool) #assert total > 0 if verbose: print(f'total: {total}\nsuccesses: {successes}') return successes / total
def step(env, name, action): info = {'yaniv_declare_correct': None} declare_yaniv = action[0] cards_to_throw = action[1] pick_from_deck = action[2] cards_in_hand_before = env.players[name].cards_in_hand n_cards_in_hand_before = len(cards_in_hand_before) hand_sum_before = cards_to_value_sum(cards_in_hand_before) reward_yaniv, reward_throw_pick = 0, 0 done = False if declare_yaniv: done = True if hand_sum_before <= YANIV_LIMIT: env.round_.round_summary(name) info['yaniv_declare_correct'] = True else: reward_yaniv = REWARD_FACTOR_YANIV_INCORRECT_PENALTY info['yaniv_declare_correct'] = False n_cards_in_hand_after = None else: env.round_.throw_cards_to_pile(name, cards_to_throw=cards_to_throw) env.round_.pull_card(name, pick_from_deck=pick_from_deck) env.round_.pile_top_cards = env.round_.pile_top_cards_this_turn env.round_.update_players_knowledge(name) cards_in_hand_after = env.players[name].cards_in_hand n_cards_in_hand_after = len(cards_in_hand_after) reward_throw_pick_points = cards_to_value_sum( cards_in_hand_before) - cards_to_value_sum(cards_in_hand_after) reward_throw_pick_points *= REWARD_FACTOR_TURN_POINT_DIFFERENCE reward_throw_pick_n_cards = n_cards_in_hand_before - n_cards_in_hand_after reward_throw_pick_n_cards *= REWARD_FACTOR_TURN_N_CARDS reward_throw_pick = reward_throw_pick_points + reward_throw_pick_n_cards reward = reward_yaniv + reward_throw_pick # --- setting up opponent --- opponent_name = _name_to_opponent_name_two_players(name, env.players) n_deck = len(env.round_.round_deck) top_accessible_cards = pile_top_accessible_cards(env.round_.pile_top_cards) cards_in_hand = env.players[opponent_name].cards_in_hand observables = n_deck, top_accessible_cards, cards_in_hand, n_cards_in_hand_after return opponent_name, observables, reward, done, info
def round_output_to_yaniv_probabilities_two_player(round_output, pov_name=None, verbose=0): player_names = round_output_to_player_names(round_output) if pov_name is None: pov_name = round_output[1]['name'] opponent_name = list(set(player_names) - set(pov_name))[0] n_turns = round_to_number_of_turns(round_output) cards_unknown = round_output['start']['deck_ordered'].copy() play_jokers = np.sum(cards_to_number_jokers(cards_unknown) > 0, dtype=bool) cards_unknown.remove(round_output[1]['pile_top_accessible'][0]) # pile top card n_j = len(round_output['start'][f'{opponent_name}_cards']) # no. of cards of opponent turn_to_yaniv_probability = {} for turn in range(1, n_turns + 1): turn_output = round_output[turn] if 'yaniv_call' not in turn_output.keys(): if turn_output['name'] == pov_name: for card in turn_output[f'{pov_name}_cards']: if card in cards_unknown: cards_unknown.remove(card) h_i = cards_to_value_sum(turn_output[f'{pov_name}_cards']) if h_i <= 7: turn_to_yaniv_probability[turn] = calculate_p_hj_gt_hi_n_j_prior(n_j, cards_unknown, h_i=h_i, play_jokers=play_jokers, verbose=verbose) # elif turn_output['pull_source'] == 'pile': !!! NEED TO ADD THIS INFORMATION !!! else: n_j = turn_output[f'{opponent_name}_ncards'] - len(turn_output['throws']) + 1 # print(turn_output['name'], turn_output['pulls'], n_j) for card in turn_output['throws']: if card in cards_unknown: cards_unknown.remove(card) else: if turn_output['name'] == pov_name: h_i = cards_to_value_sum(turn_output[f'{pov_name}_cards']) turn_to_yaniv_probability[turn] = calculate_p_hj_gt_hi_n_j_prior(n_j, cards_unknown, h_i=h_i, play_jokers=play_jokers, verbose=verbose) return turn_to_yaniv_probability
def basic_policy(observables, turn_number, seed=None, yaniv_thresh=None, throw_out='highest_combination', pickup='random', deck_prob=0.5): assert throw_out in ['highest_card', 'highest_combination', 'random_card'] assert pickup in ['random', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] if yaniv_thresh is None: yaniv_thresh = YANIV_LIMIT n_deck = observables[0] top_accessible_cards = observables[1] cards_in_hand = observables[2] n_cards_opponent = observables[3] hand_sum = cards_to_value_sum(cards_in_hand) yaniv_call = 0 if hand_sum <= yaniv_thresh: # always call Yaniv yaniv_call = 1 # continuing just in case the call was wrong ... # throwing out highest value card (not getting rid of cards yet ...) cards_in_hand_sorted = sort_cards(cards_in_hand, descending=True) np.random.seed(seed) if 'highest_card' == throw_out: cards_to_throw = [cards_in_hand_sorted[0]] elif 'highest_combination': cards_to_throw = cards_to_best_combination(cards_in_hand_sorted) elif 'random_card' == throw_out: cards_to_throw = [np.random.choice(cards_in_hand_sorted)] # picking from deck at random if 'random' == pickup: pick_from_deck = np.random.binomial(1, deck_prob) else: card_values = [card_to_value(card) for card in top_accessible_cards] idx_lowest = np.array(card_values).argmin() if pickup >= card_values[idx_lowest]: pick_from_deck = 0 else: pick_from_deck = 1 return (yaniv_call, cards_to_throw, pick_from_deck)
def test4(self): self.assertEqual(cards_to_value_sum(['{}{}'.format(JOKER_RANK, JOKER_SUITE1), '4s']), 4)
def test3(self): self.assertEqual(cards_to_value_sum(['Jh', 'Qs', 'Kc', 'Ad']), 31)
def test2(self): self.assertEqual(cards_to_value_sum(['Jh', '4s']), 14)
def test(self): self.assertEqual(cards_to_value_sum(['5h', '4s']), 9)