def get_after_states(self, current_state): after_states = [] # Horizontal placements max_col_index = self.num_columns - 2 for col_ix, free_pos in enumerate( current_state.lowest_free_rows[:max_col_index]): anchor_row = np.maximum( current_state.lowest_free_rows[col_ix] - 1, np.max(current_state.lowest_free_rows[(col_ix + 1):(col_ix + 3)])) new_representation = current_state.representation.copy() new_representation[anchor_row, (col_ix + 1):(col_ix + 3)] = 1 new_representation[anchor_row + 1, col_ix:(col_ix + 2)] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix:(col_ix + 2)] = anchor_row + 2 new_lowest_free_rows[col_ix + 2] = anchor_row + 1 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 1), pieces_per_changed_row=np.array([2]), landing_height_bonus=0.5, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) # Vertical placements max_col_index = self.num_columns - 1 for col_ix, free_pos in enumerate( current_state.lowest_free_rows[:max_col_index]): anchor_row = np.maximum( current_state.lowest_free_rows[col_ix], current_state.lowest_free_rows[col_ix + 1] - 1) new_representation = current_state.representation.copy() new_representation[anchor_row:(anchor_row + 2), col_ix] = 1 new_representation[(anchor_row + 1):(anchor_row + 3), col_ix + 1] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix] = anchor_row + 2 new_lowest_free_rows[col_ix + 1] = anchor_row + 3 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 2), pieces_per_changed_row=np.array([1, 2]), landing_height_bonus=1, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) return after_states
def get_after_states(self, current_state): after_states = [] # Vertical placements for col_ix, free_pos in enumerate(current_state.lowest_free_rows): anchor_row = free_pos new_representation = current_state.representation.copy() new_representation[anchor_row:(anchor_row + 4), col_ix] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix] += 4 new_state = state.State( representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange(anchor_row, anchor_row + 4), pieces_per_changed_row=np.array([1, 1, 1, 1]), landing_height_bonus=1.5, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) # Horizontal placements max_col_index = self.num_columns - 3 for col_ix, free_pos in enumerate( current_state.lowest_free_rows[:max_col_index]): anchor_row = np.max(current_state.lowest_free_rows[col_ix:(col_ix + 4)]) new_representation = current_state.representation.copy() new_representation[anchor_row, col_ix:(col_ix + 4)] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix:(col_ix + 4)] = anchor_row + 1 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 1), pieces_per_changed_row=np.array([4]), landing_height_bonus=0, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) return after_states
def reset(self): self.game_over = False self.current_state = state.State(representation=np.zeros( (self.num_rows + self.tetromino_size, self.num_columns), dtype=np.int_), lowest_free_rows=np.zeros( self.num_columns, dtype=np.int_), num_features=self.num_features, feature_type=self.feature_type) self.tetromino_sampler = tetromino.TetrominoSampler(self.tetrominos) self.cleared_lines = 0 self.state_samples = []
def __init__(self, num_columns, num_rows, player, verbose, plot_intermediate_results=False, tetromino_size=4, target_update=1, max_cleared_test_lines=np.inf): self.num_columns = num_columns self.num_rows = num_rows self.tetromino_size = tetromino_size self.player = player self.verbose = verbose self.target_update = target_update self.num_features = self.player.num_features self.feature_type = self.player.feature_type self.n_fields = self.num_columns * self.num_rows self.game_over = False self.current_state = state.State(representation=np.zeros( (self.num_rows + self.tetromino_size, self.num_columns), dtype=np.int_), lowest_free_rows=np.zeros( self.num_columns, dtype=np.int_), num_features=self.num_features, feature_type=self.feature_type) self.tetrominos = [ tetromino.Straight(self.feature_type, self.num_features, self.num_columns), tetromino.RCorner(self.feature_type, self.num_features, self.num_columns), tetromino.LCorner(self.feature_type, self.num_features, self.num_columns), tetromino.Square(self.feature_type, self.num_features, self.num_columns), tetromino.SnakeR(self.feature_type, self.num_features, self.num_columns), tetromino.SnakeL(self.feature_type, self.num_features, self.num_columns), tetromino.T(self.feature_type, self.num_features, self.num_columns) ] self.tetromino_sampler = tetromino.TetrominoSamplerRandom( self.tetrominos) self.cleared_lines = 0 self.state_samples = [] self.cumulative_steps = 0 self.max_cleared_test_lines = max_cleared_test_lines self.plot_intermediate_results = plot_intermediate_results
def reset(self): self.game_over = False self.current_state = state.State( np.zeros((self.num_rows, self.num_columns), dtype=np.bool_), # representation= np.zeros(self.num_columns, dtype=np.int64), # lowest_free_rows= np.array([0], dtype=np.int64), # changed_lines= np.array([0], dtype=np.int64), # pieces_per_changed_row= 0.0, # landing_height_bonus= self.num_features, # num_features= "bcts", # feature_type= False, # terminal_state= False # has_overlapping_fields= ) self.current_state.calc_bcts_features() self.cleared_lines = 0 self.generative_model.next_tetromino()
def load_rollout_state_population(p, max_samples, print_average_height=False): sample_list_save_name = p.rollout_population_path with open(sample_list_save_name, "r") as ins: rollout_population = [] count = 0 for x in ins: if count < max_samples: # print(count) rep = np.vstack((np.array([ np.array([int(z) for z in bin(int(y))[3:13]]) for y in x.split() ]), np.zeros((4, p.num_columns)))) rep = rep.astype(np.bool_) lowest_free_rows = calc_lowest_free_rows(rep) rollout_population.append( state.State( rep, lowest_free_rows, np.array([0], dtype=np.int64), # changed_lines= np.array([0], dtype=np.int64), # pieces_per_changed_row= 0.0, # landing_height_bonus= 8, # num_features= "bcts", # feature_type= False # terminal_state= )) count += 1 else: break # print(f"Succesfully loaded {count} rollout starting states!") if print_average_height: average_lowest_free_rows = np.mean( [np.mean(d.lowest_free_rows) for d in rollout_population]) print("average height in rollout state population", average_lowest_free_rows) return rollout_population
def __init__(self, num_columns, num_rows, max_cleared_test_lines=10e9, tetromino_size=4, feature_type="bcts", num_features=8): """ :param num_columns: :param num_rows: :param tetromino_size: :param max_cleared_test_lines: """ self.num_columns = num_columns self.num_rows = num_rows self.tetromino_size = tetromino_size self.num_features = num_features self.feature_type = feature_type self.max_cleared_test_lines = max_cleared_test_lines self.game_over = False self.current_state = state.State( np.zeros((self.num_rows, self.num_columns), dtype=np.bool_), # representation= np.zeros(self.num_columns, dtype=np.int64), # lowest_free_rows= np.array([0], dtype=np.int64), # changed_lines= np.array([0], dtype=np.int64), # pieces_per_changed_row= 0.0, # landing_height_bonus= self.num_features, # num_features= "bcts", # feature_type= False, # terminal_state= False # has_overlapping_fields= ) self.generative_model = tetromino.Tetromino(self.feature_type, self.num_features, self.num_columns) self.cleared_lines = 0
def get_after_states(self, current_state): after_states = [] max_col_index = self.num_columns - 2 for col_ix, free_pos in enumerate( current_state.lowest_free_rows[:max_col_index]): # Bottom-left corner (= 'hole' in top-right corner) anchor_row = np.max(current_state.lowest_free_rows[col_ix:(col_ix + 3)]) new_representation = current_state.representation.copy() new_representation[anchor_row, col_ix:(col_ix + 3)] = 1 new_representation[anchor_row + 1, col_ix] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix] = anchor_row + 2 new_lowest_free_rows[(col_ix + 1):(col_ix + 3)] = anchor_row + 1 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 1), pieces_per_changed_row=np.array([3]), landing_height_bonus=0.5, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) # Top-right corner anchor_row = np.maximum( np.max(current_state.lowest_free_rows[col_ix:(col_ix + 2)]) - 1, current_state.lowest_free_rows[col_ix + 2]) new_representation = current_state.representation.copy() new_representation[anchor_row + 1, col_ix:(col_ix + 3)] = 1 new_representation[anchor_row, col_ix + 2] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix:(col_ix + 3)] = anchor_row + 2 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 2), pieces_per_changed_row=np.array([1, 3]), landing_height_bonus=0.5, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) # Vertical placements. 'height' becomes 'width' :) max_col_index = self.num_columns - 1 for col_ix, free_pos in enumerate( current_state.lowest_free_rows[:max_col_index]): # Top-left corner anchor_row = np.maximum( current_state.lowest_free_rows[col_ix], current_state.lowest_free_rows[col_ix + 1] - 2) new_representation = current_state.representation.copy() new_representation[anchor_row + 2, col_ix + 1] = 1 new_representation[anchor_row:(anchor_row + 3), col_ix] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix:(col_ix + 2)] = anchor_row + 3 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 3), pieces_per_changed_row=np.array([1, 1, 2]), landing_height_bonus=1, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) # Bottom-right corner anchor_row = np.max(current_state.lowest_free_rows[col_ix:(col_ix + 2)]) new_representation = current_state.representation.copy() new_representation[anchor_row:(anchor_row + 3), col_ix + 1] = 1 new_representation[anchor_row, col_ix] = 1 new_lowest_free_rows = current_state.lowest_free_rows.copy() new_lowest_free_rows[col_ix + 1] = anchor_row + 3 new_lowest_free_rows[col_ix] = anchor_row + 1 new_state = state.State(representation=new_representation, lowest_free_rows=new_lowest_free_rows, anchor_col=col_ix, changed_lines=np.arange( anchor_row, anchor_row + 1), pieces_per_changed_row=np.array([2]), landing_height_bonus=1, num_features=self.num_features, feature_type=self.feature_type) after_states.append(new_state) return after_states
[0, 0, 0, 1, 1, 1, 0, 1, 1, 1], [1, 1, 1, 1, 1, 0, 0, 1, 1, 1], [0, 1, 1, 1, 1, 1, 0, 1, 1, 1], [0, 1, 1, 1, 1, 1, 0, 1, 1, 1], [0, 1, 0, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 1, 0, 1, 1]]) lowest_free_rows = calc_lowest_free_rows(representation) st = state.State(representation=representation, lowest_free_rows=lowest_free_rows, changed_lines=np.array([0], dtype=np.int64), pieces_per_changed_row=np.array([0], dtype=np.int64), landing_height_bonus=0.0, num_features=8, feature_type="bcts", terminal_state=False, # this is useful to generate a "terminal state" has_overlapping_fields=False) print(print_board_to_string(st)) tet = tetromino.Tetromino(feature_type="bcts", num_features=8, num_columns=10) print(print_tetromino(tet.current_tetromino)) af_st = tet.get_after_states(st) print(print_board_to_string(af_st[0]))