def test_play(self): B = Baraja() B.start_with(list(map(Naipe, ['2S', '2D', '3S', '2H', '2C']))) self.assertEqual(Prize.Poker, B.play(28)) B.start_with(list(map(Naipe, ['2S', '2D', '3S', '3H', '2C']))) self.assertEqual(Prize.FullHouse, B.play(31)) B = Baraja() B.start_with(map(Naipe, ['4S', '2C', '8S', 'QC', '4D'])) self.assertEqual(Prize.TwoPair, B.play(18, rand_sampling=False))
def play_game(sample_size): bb = Baraja() bb.revolver() A = bb.approx_best_move(sample_size=sample_size) prize = bb.play(A[0]).value logging.debug('%s -- %s -- %s' % (bb.preview(), act.actions[A[0]], prize)) return prize
import sys from scipy.stats import chi2 # %load_ext autoreload # %autoreload 2 import actions as act from collections import Counter from baraja import Naipe, Mano, Baraja, Prize # #%run ~/code/EfedEquis/Efedequis/poquer/baraja/baraja # %%time b = Baraja() b.start_with(map(Naipe,['6D', 'QH', '7H', '7S', '10C'])) #b.evaluate(13, sample_size=5000000) prize_counter = Counter() for _ in range(1000): prize_counter.update([b.play(13)]) print(prize_counter.most_common()) b.approx_best_move(sample_size=300) # %%time b = Baraja() b.start_with(map(Naipe,['3S', 'QS', 'JD', '7S', '6S'])) res = [] for i in range(32): res.append((act.actions[i], b.evaluate_eff(i, sample_size=10))) {k: v for k, v in sorted(res, key=lambda item: -item[1])} best = b.approx_best_move(sample_size=100) act.actions[best[0]],best[1] # +
def evaluate_strategy(s, b, **kwargs): ''' kwargs goes directly to the strategy function ''' best = s(b, **kwargs) print(act.actions[best]) return b.play(best, rand_sampling=False).value print(b) evaluate_strategy(strategy, b, sample_size=1000) # - B = Baraja() B.start_with(map(Naipe, ['4S', '2C', '8S', 'QC', '4D'])) print(B) B.play(18, rand_sampling=False) # + jupyter={"outputs_hidden": true} # %%time b = Baraja() b.start_with(map(Naipe,['AS', 'QS', 'QD', 'JS', '10S'])) res = [] for i in range(32): res.append((act.actions[i], b.evaluate(i, sample_size=100))) {k: v for k, v in sorted(res, key=lambda item: -item[1])} # - [i for i in range(5) if i not in act.actions[29] ], act.actions[29] for n in iter(m): print(Naipe(n).repr_naipe())
b.revolver() xbat.append(b.one_hot()) ybat.append(b.approx_best_move(sample_size=150)[0]) return xbat, ybat for _ in range(5): print('started batching ') pool = mp.Pool(processes=4) ret = pool.map(batching, 4 * [10]) pool.close() lx = reduce(lambda a, b: a + b, [r[0] for r in ret]) ly = reduce(lambda a, b: a + b, [r[1] for r in ret]) x_train = np.reshape(np.array(lx).astype(np.float32), [len(lx), 52]) y_train = np.reshape(np.array(ly).astype(np.float32), [len(ly), 1]) model.fit(x_train, y_train, epochs=5) bar = Baraja() num_games = 10 credit = 0 for _ in range(num_games): bar.revolver() oh = np.array(bar.one_hot(), ndmin=2) acc = np.argmax(model.call(oh)) #acc = random.randint(0,31) credit += bar.play(acc).value print("The expected return is: ", credit / num_games - 1) model_filepath = 'poker.mdl' print('saving model at: ', model_filepath) model.save(model_filepath)