def setUpGame(self): self.game = Game() for p in players: self.game.add_player(p) self.game.start_game(players[0]) self.game.turn_order = list(players)
def show_game(num_players, win=True): g = Game() [display(g.add_player('player_%d' % i)) for i in range(1, num_players+1)] p1 = g._players[g._players.keys()[0]].name g.start_game(p1) while not g._is_game_over(): for c in Game.colors: for i in xrange(1, 6): # get current player p = g.turn_order[0] # card 'A' is always first. g._players[p].sort_cards() # the fix in in, put the in/correct card at 'A' if win: g._players[p].hand[0] = Card(c, i, 'A') else: g._players[p].hand[0] = Card(c, 6-i, 'A') show_hands(g) print '%s playing card A' % p display(g.play_card(p, 'A')) if g._is_game_over(): return
def show_game(num_players, win=True): g = Game() [display(g.add_player("player_%d" % i)) for i in range(1, num_players + 1)] p1 = g._players[g._players.keys()[0]].name g.start_game(p1) while not g._is_game_over(): for c in ["red", "white", "blue", "green", "yellow"]: for i in xrange(1, 6): # get current player p = g.turn_order[0] # card 'A' is always first. g._players[p].sort_cards() # the fix in in, put the in/correct card at 'A' if win: g._players[p].hand[0] = Card(c, i, "A") else: g._players[p].hand[0] = Card(c, 6 - i, "A") print "%s playing card A" % p display(g.play_card(p, "A")) if g._is_game_over(): return
def setUpGame(self): self.game = Game() self.game.markup = xterm_markup() for p in players: self.game.add_player(p) for c in self.game.deck: c.markup = xterm_markup() self.game.start_game(players[0]) self.game.turn_order = list(players)
def test_rainbow_display(self): p = Player(players[0]) p.hand = [Card('rainbow', i, b) for i, b in zip(xrange(1,6), uppercase[:5])] m = xterm_markup() print m.color('hello world', xterm_markup.RAINBOW) self.game = Game() self.game.markup = m for p in players: self.game.add_player(p) for c in self.game.deck: c.markup = xterm_markup() self.game.start_game(players[0], opts={'rainbow_10': True}) self.game.turn_order = list(players) for n, l in [(1, 'A'), (2, 'B'), (3, 'C')]: c = Card(text_markup_base.RAINBOW, n, l) c.markup = self.game.markup self.game._players[self.game.player_turn()].hand[n-1] = c self.game.play_card(self.game.player_turn(), 'A') self.game.play_card(self.game.player_turn(), 'A') print self.game.play_card(self.game.player_turn(), 'B')
def test_unsolvable_rainbow_5(self): game = Game() game.markup = xterm_markup() for p in players: game.add_player(p) for c in game.deck: c.markup = xterm_markup() opts = {'rainbow_5': True} game.options['solvable_rainbow_5'] = True bad_card = Card('rainbow', 1) bad_card.markup = xterm_markup() game.deck[len(game.deck)-1] = bad_card last_card = game.deck[len(game.deck)-1] print '\nlast card before: %s' % last_card game.start_game(players[0], opts) last_card = game.deck[len(game.deck)-1] print 'last card after: %s' % last_card self.assertFalse(last_card.color == 'rainbow' and last_card.number in [1,2,3,4])
class test_hanabi(unittest2.TestCase): def setUpGame(self): self.game = Game() for p in players: self.game.add_player(p) self.game.start_game(players[0]) self.game.turn_order = list(players) def getHand(self, h): return ' '.join([str(c) for c in h]) def getBacks(self, h): return ''.join([c.back() for c in h]) def getFronts(self, h): return ''.join([c.front() for c in h]) def test_handmgt(self): p = Player(players[0]) p.hand = [Card('red', i, b) for i, b in zip(xrange(1,6), uppercase[:5])] self.assertEqual('ABCDE', self.getBacks(p.hand)) print p.swap_cards('A', 'E') self.assertEqual('EBCDA', self.getBacks(p.hand)) def test_play(self): self.setUpGame() print self.game.turn() print self.game.play_card(players[0], 'A') print self.game.turn() print self.game.hint_player(players[1], players[0], 'blue')
# the fix in in, put the in/correct card at 'A' if win: g._players[p].hand[0] = Card(c, i, "A") else: g._players[p].hand[0] = Card(c, 6 - i, "A") print "%s playing card A" % p display(g.play_card(p, "A")) if g._is_game_over(): return # run through a bunch of games. for w in [True, False]: for n in range(5, 1, -1): show_game(n, w) g = Game() display(g.add_player("Olive")) display(g.get_hands("Olive")) display(g.add_player("Maisie")) display(g.add_player("Jasper")) display(g.add_player("George")) display(g.add_player("Frank")) display(g.add_player("One Too Many")) display(g.get_table()) display(g.remove_player("George")) display(g.get_table()) display(g.start_game("Olive")) display(g.remove_player("Maisie"))
if win: g._players[p].hand[0] = Card(c, i, 'A') else: g._players[p].hand[0] = Card(c, 6-i, 'A') show_hands(g) print '%s playing card A' % p display(g.play_card(p, 'A')) if g._is_game_over(): return # run through a bunch of games. for w in [True, False]: for n in range(5, 1, -1): show_game(n, w) g = Game() display(g.add_player('Olive')) display(g.get_hands('Olive')) display(g.add_player('Maisie')) display(g.add_player('Jasper')) display(g.add_player('George')) display(g.add_player('Frank')) display(g.add_player('One Too Many')) display(g.get_table()) display(g.remove_player('George')) display(g.get_table()) display(g.start_game('Olive')) display(g.remove_player('Maisie'))
class test_hanabi(unittest2.TestCase): def setUpGame(self): self.game = Game() self.game.markup = xterm_markup() for p in players: self.game.add_player(p) for c in self.game.deck: c.markup = xterm_markup() self.game.start_game(players[0]) self.game.turn_order = list(players) def getHand(self, h): return ' '.join([str(c) for c in h]) def getBacks(self, h): return ''.join([c.back() for c in h]) def getFronts(self, h): return ''.join([c.front() for c in h]) def test_handmgt(self): p = Player(players[0]) p.hand = [Card('red', i, b) for i, b in zip(xrange(1,6), uppercase[:5])] for c in p.hand: c.markup = xterm_markup() self.assertEqual('ABCDE', self.getBacks(p.hand)) print p.swap_cards('A', 'E') self.assertEqual('EBCDA', self.getBacks(p.hand)) def test_play(self): self.setUpGame() print self.game.turn() print self.game.play_card(players[0], 'A') print self.game.turn() print self.game.hint_player(players[1], players[0], 'blue') def test_show_hints(self): p0, p1 = players[0], players[1] self.setUpGame() self.game.hint_player(p0, p1, 1) self.game.hint_player(p1, p0, 1) self.game.hint_player(p0, p1, 1) print self.game.hints(p0) hints = self.game.hints(p0) self.assertTrue(len(hints.private[p0]) == 1) print self.game.hints(p1, show_all=True) hints = self.game.hints(p1, show_all=True) self.assertTrue(len(hints.private[p1]) == 3) def test_lastround(self): # Make the deck have one card, let a player discard. # Then make sure each player (inc. initial one) gets one # more turn. self.setUpGame() self.game.deck = [Card('red', 1, 'A')] self.game.options['repeat_backs']['value'] = True for c in self.game.deck: c.markup = xterm_markup() self.game.discard_card(self.game.player_turn(), 'A') for i in xrange(len(players)): self.assertFalse(self.game.game_over()) print self.game.discard_card(self.game.player_turn(), 'A') self.assertTrue(self.game.game_over()) def test_rainbow_display(self): p = Player(players[0]) p.hand = [Card('rainbow', i, b) for i, b in zip(xrange(1,6), uppercase[:5])] m = xterm_markup() print m.color('hello world', xterm_markup.RAINBOW) self.game = Game() self.game.markup = m for p in players: self.game.add_player(p) for c in self.game.deck: c.markup = xterm_markup() self.game.start_game(players[0], opts={'rainbow_10': True}) self.game.turn_order = list(players) for n, l in [(1, 'A'), (2, 'B'), (3, 'C')]: c = Card(text_markup_base.RAINBOW, n, l) c.markup = self.game.markup self.game._players[self.game.player_turn()].hand[n-1] = c self.game.play_card(self.game.player_turn(), 'A') self.game.play_card(self.game.player_turn(), 'A') print self.game.play_card(self.game.player_turn(), 'B') def test_hints(self): self.setUpGame() p1, p2 = self.game.turn_order[0], self.game.turn_order[1] self.game.hint_player(p1, p2, 5) self.game.hint_player(p2, p1, 5) self.game.hint_player(p1, p2, 5) self.game.hint_player(p2, p1, 5) self.game.hint_player(p1, p2, 5) gr = self.game.hints(p1) self.assertTrue(len(gr.private[p1]) == 2) gr = self.game.hints(p2) self.assertTrue(len(gr.private[p2]) == 3) def test_watch(self): self.setUpGame() p1 = self.game.turn_order[0] print self.game.add_watcher('henry') self.assertTrue('henry' in self.game._watchers) print self.game.discard_card(p1, 'A') print self.game.remove_player('henry') self.assertFalse('henry' in self.game._watchers) print self.game.add_watcher(p1) self.assertTrue(not p1 in self.game._watchers) def test_unsolvable_rainbow_5(self): game = Game() game.markup = xterm_markup() for p in players: game.add_player(p) for c in game.deck: c.markup = xterm_markup() opts = {'rainbow_5': True} game.options['solvable_rainbow_5'] = True bad_card = Card('rainbow', 1) bad_card.markup = xterm_markup() game.deck[len(game.deck)-1] = bad_card last_card = game.deck[len(game.deck)-1] print '\nlast card before: %s' % last_card game.start_game(players[0], opts) last_card = game.deck[len(game.deck)-1] print 'last card after: %s' % last_card self.assertFalse(last_card.color == 'rainbow' and last_card.number in [1,2,3,4])
def play_and_train(args, policy, optim): total_loss = 0 turns = 0 scores = [] while turns < args.bs: log_probs = [] rewards = [] game = Game(4) t = time_logging.start() while True: x = game.encode() t = time_logging.end("encode", t) x = torch.tensor(x, device=args.device, dtype=torch.float32) x = args.beta * policy(x) t = time_logging.end("policy", t) loss = [0] def sample(x, w=1): if torch.rand(()) < args.randmove: m = torch.distributions.Categorical(logits=torch.zeros_like(x)) else: m = torch.distributions.Categorical(logits=x) i = m.sample().item() loss[0] += x.log_softmax(0)[i].mul(w) return i action = sample(x[:3]) score = game.score if action == 0: position = sample(x[3:3+5]) out = game.play(position) if action == 1: position = sample(x[3:3+5]) out = game.discard(position) if action == 2: target = sample(x[3+5:3+5+5], 0.5) info = sample(x[3+5+5:3+5+5+10], 0.5) if info < 5: out = game.clue(target, info) else: out = game.clue(target, "rgbyp"[info-5]) t = time_logging.end("decode", t) log_probs.append(loss[0]) if out is not None: rewards.append(-1) break if game.gameover: if game.score == 25: rewards.append(game.score - score) else: rewards.append(-1) break rewards.append(game.score - score) if len(log_probs) >= 3: turns += len(log_probs) R = 0 returns = [] for r in rewards[::-1]: R = r + args.gamma * R returns.insert(0, R) returns = torch.tensor(returns, device=args.device, dtype=torch.float32) returns = (returns - returns.mean()) / (returns.std() + 1e-5) for log_prob, R in zip(log_probs, returns): total_loss += -(log_prob * R) scores.append(game.score) total_loss /= turns optim.zero_grad() total_loss.backward() optim.step() t = time_logging.end("backward & optim", t) return scores