def _get_environment(self): """Observe the environment""" if not self.done: self._get_legal_moves() self.observation = None self.reward = 0 self.info = None self.community_data = CommunityData(len(self.players)) self.community_data.community_pot = self.community_pot / ( self.big_blind * 100) self.community_data.current_round_pot = self.current_round_pot / ( self.big_blind * 100) self.community_data.small_blind = self.small_blind self.community_data.big_blind = self.big_blind self.community_data.stage[np.minimum(self.stage.value, 3)] = 1 # pylint: disable= invalid-sequence-index self.community_data.legal_moves = [ action in self.legal_moves for action in Action ] # self.cummunity_data.active_players self.player_data = PlayerData() self.player_data.stack = [ player.stack / (self.big_blind * 100) for player in self.players ] if not self.current_player: # game over self.current_player = self.players[self.winner_ix] self.player_data.position = self.current_player.seat self.current_player.equity_alive = self.get_equity( set(self.current_player.cards), set(self.table_cards), sum(self.player_cycle.alive), 1000) self.player_data.equity_to_river_alive = self.current_player.equity_alive arr1 = np.array(list(flatten(self.player_data.__dict__.values()))) arr2 = np.array(list(flatten(self.community_data.__dict__.values()))) arr3 = np.array([ list(flatten(sd.__dict__.values())) for sd in self.stage_data ]).flatten() # arr_legal_only = np.array(self.community_data.legal_moves).flatten() self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten() self.observation = self.array_everything self._get_legal_moves() self.info = { 'player_data': self.player_data.__dict__, 'community_data': self.community_data.__dict__, 'stage_data': [stage.__dict__ for stage in self.stage_data], 'legal_moves': self.legal_moves } self.observation_space = self.array_everything.shape if self.render_switch: self.render()
def _get_environment(self): """Observe the environment""" if not self.done: self._get_legal_moves() self.observation = None self.reward = None self.info = None self.community_data = CommunityData(len(self.players)) self.community_data.community_pot = self.community_pot self.community_data.current_round_pot = self.current_round_pot self.community_data.small_blind = self.small_blind self.community_data.big_blind = self.big_blind self.community_data.stage[np.minimum(self.stage.value, 3)] = 1 # self.cummunity_data.active_players self.player_data = PlayerData() self.player_data.stack = [player.stack for player in self.players] if not self.current_player: # game over self.current_player = self.players[self.winner_ix] self.player_data.position = self.current_player.seat self.current_player.equity_alive = get_equity( self.current_player.cards, self.table_cards, sum(self.player_cycle.alive)) self.player_data.equity_to_river_alive = self.current_player.equity_alive arr1 = np.array(list(flatten(self.player_data.__dict__.values()))) arr2 = np.array(list(flatten(self.community_data.__dict__.values()))) arr3 = np.array([ list(flatten(sd.__dict__.values())) for sd in self.stage_data ]).flatten() self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten() self.observation = { 'array_everything': self.array_everything, 'player_data': self.player_data, 'community_data': self.community_data, 'stage_data': self.stage_data } self._get_legal_moves() self.reward = self.current_player.stack + self.player_data.equity_to_river_alive * self.community_pot if self.render_switch: self.render()
def _get_environment(self): """Observe the environment""" if not self.done: self._get_legal_moves() self.observation = None self.reward = 0 self.info = None self.community_data = CommunityData(len(self.players)) self.community_data.community_pot = self.community_pot / ( self.big_blind * 100) self.community_data.current_round_pot = self.current_round_pot / ( self.big_blind * 100) self.community_data.small_blind = self.small_blind self.community_data.big_blind = self.big_blind self.community_data.stage[np.minimum(self.stage.value, 3)] = 1 self.community_data.game_stage = self.stage.value self.community_data.legal_moves = [ action in self.legal_moves for action in Action ] self.community_data.dealer_position = self.player_cycle.dealer_idx self.community_data.active_players = self.player_cycle.alive self.community_data.min_call = self.min_call / (self.big_blind * 10000) self.player_data = PlayerData() self.player_data.stack = [ player.stack / (self.big_blind * 100) for player in self.players ] if not self.current_player: # game over self.current_player = self.players[self.winner_ix] self.player_data.position = self.current_player.seat self.current_player.equity_alive = get_equity( self.current_player.cards, self.table_cards, sum(self.player_cycle.alive)) self.player_data.equity_to_river_alive = self.current_player.equity_alive self.player_data.stack_amount = self.current_player.stack self.player_data.first_decision = ( 1 if self.current_player.last_action_in_stage == '' else 0) self.player_data.hand_rank = self.get_rank(self.stage, self.current_player.cards, self.table_cards) arr1 = np.array(list(flatten(self.player_data.__dict__.values()))) arr2 = np.array(list(flatten(self.community_data.__dict__.values()))) arr3 = np.array([ list(flatten(sd.__dict__.values())) for sd in self.stage_data ]).flatten() # arr_legal_only = np.array(self.community_data.legal_moves).flatten() self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten() self.observation = self.array_everything self.info = { 'player_data': self.player_data.__dict__, 'community_data': self.community_data.__dict__, 'stage_data': [stage.__dict__ for stage in self.stage_data] } self._get_legal_moves() self.observation_space = self.array_everything.shape if self.render_switch: self.render()