def play_game(game_id, model, per_step_discount_factor=0.95, prob_exploration=0.1): training_data = dict( board=[], target=[]) np.random.seed(game_id) board = crush.new_board(width, height, n_colours) # Same as portrait phone 1 screen~1k, high-score~14k score_total, new_cols_total, moves_total, game_step = 0,0,0,0 while True: moves = crush.potential_moves(board) moves_total += len(moves) if len(moves)==0: # Need to add a training example : This is a zero-score outcome training_data['board'].append( make_features_in_layers(board) ) training_data['target'].append( 0. ) break # Let's find the highest-scoring of those moves: First, get all the features next_step_features = [] next_step_target = [] for (h,v) in moves: # [0:2] b, score, n_cols = crush.after_move(board, h,v, -1) # Added columns are unknown next_step_features.append( make_features_in_layers(b) ) #next_step_target.append( score ) next_step_target.append( n_cols ) # Now evaluate the Q() values of the resulting postion for each possible move in one go all_features = np.array(next_step_features) # , dtype='float32' #print("all_features.shape", all_features.shape) remember_training, i = False, -1 if prob_exploration<0: # This is testing only - just need to pick the best move next_step_q = model_evaluate_features_deterministic( all_features ) else: if np.random.uniform(0.0, 1.0)<prob_exploration: ## Choose a random move, and do it i = np.random.randint( len(moves) ) else: next_step_q = model_evaluate_features( all_features ) remember_training=True if i<0: next_step_aggregate = np.array( next_step_target, dtype='float32') + per_step_discount_factor * next_step_q.flatten() #print( next_step_aggregate ) i = np.argmax( next_step_aggregate ) (h,v) = moves[i] #print("Move : (%2d,%2d)" % (h,v)) #crush.show_board(board, highlight=(h,v)) if remember_training: # Only collect training data if not testing training_data['board'].append( make_features_in_layers(board) ) training_data['target'].append( next_step_aggregate[i] ) # This value includes a Q() that looks at the 'blank cols', rather than the actuals board, score, new_cols = crush.after_move(board, h,v, n_colours) # Now we do the move 'for real' score_total += score new_cols_total += new_cols #print("Move[%2d]=(%2d,%2d) -> Score : %3d, new_cols=%1d" % (i, h,v, score,new_cols)) #crush.show_board(board, highlight=(0,0)) game_step += 1 stats=dict( steps=game_step, av_potential_moves=float(moves_total) / game_step, score=score_total, new_cols=new_cols_total ) return stats, training_data
def play_game(game_id, model, per_step_discount_factor=0.95, prob_exploration=0.1): training_data = dict(board=[], target=[]) np.random.seed(game_id) board = crush.new_board( width, height, n_colours) # Same as portrait phone 1 screen~1k, high-score~14k score_total, new_cols_total, moves_total, game_step = 0, 0, 0, 0 while True: moves = crush.potential_moves(board) moves_total += len(moves) if len(moves) == 0: # Need to add a training example : This is a zero-score outcome training_data['board'].append(make_features_in_layers(board)) training_data['target'].append(0.) break # Let's find the highest-scoring of those moves: First, get all the features next_step_features = [] next_step_target = [] for (h, v) in moves: # [0:2] b, score, n_cols = crush.after_move( board, h, v, -1) # Added columns are unknown next_step_features.append(make_features_in_layers(b)) #next_step_target.append( score ) next_step_target.append(n_cols) # Now evaluate the Q() values of the resulting postion for each possible move in one go all_features = np.array(next_step_features) # , dtype='float32' #print("all_features.shape", all_features.shape) remember_training, i = False, -1 if prob_exploration < 0: # This is testing only - just need to pick the best move next_step_q = model_evaluate_features_deterministic(all_features) else: if np.random.uniform(0.0, 1.0) < prob_exploration: ## Choose a random move, and do it i = np.random.randint(len(moves)) else: next_step_q = model_evaluate_features(all_features) remember_training = True if i < 0: next_step_aggregate = np.array( next_step_target, dtype='float32' ) + per_step_discount_factor * next_step_q.flatten() #print( next_step_aggregate ) i = np.argmax(next_step_aggregate) (h, v) = moves[i] #print("Move : (%2d,%2d)" % (h,v)) #crush.show_board(board, highlight=(h,v)) if remember_training: # Only collect training data if not testing training_data['board'].append(make_features_in_layers(board)) training_data['target'].append( next_step_aggregate[i] ) # This value includes a Q() that looks at the 'blank cols', rather than the actuals board, score, new_cols = crush.after_move( board, h, v, n_colours) # Now we do the move 'for real' score_total += score new_cols_total += new_cols #print("Move[%2d]=(%2d,%2d) -> Score : %3d, new_cols=%1d" % (i, h,v, score,new_cols)) #crush.show_board(board, highlight=(0,0)) game_step += 1 stats = dict(steps=game_step, av_potential_moves=float(moves_total) / game_step, score=score_total, new_cols=new_cols_total) return stats, training_data
sameness[:-shift_right,:] = np.equal( board[:-shift_right, :], board[shift_right:, :] )*1. #print(sameness) feature_layers.append( sameness ) stacked = np.dstack( feature_layers ) return np.rollaxis( stacked, 2, 0 ) #width, height, n_colours = 10,14,5 width, height, n_colours = 5,8,4 # Create a board for initial sizing only board_temp = crush.new_board(width, height, n_colours) # Same as portrait phone 1 screen~1k, high-score~14k #features_shape = make_features_variable_size(board_temp).shape features_shape = make_features_in_layers(board_temp).shape print( features_shape ) #exit(0) # Now, create a simple ?fully-connected? network (MNIST-like sizing) # See : https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py # Does it make sense to do dropout? Perhaps learn over a batch a few times to 'average out' a little? def build_cnn(input_var, features_shape): # Create a CNN of two convolution layers and a fully-connected hidden layer in front of the output layer lasagne.random.set_rng( np.random ) # np.random.RandomState.get_state() # Input layer, as usual:
sameness[:-shift_right, :] = np.equal(board[:-shift_right, :], board[shift_right:, :]) * 1. #print(sameness) feature_layers.append(sameness) stacked = np.dstack(feature_layers) return np.rollaxis(stacked, 2, 0) #width, height, n_colours = 10,14,5 width, height, n_colours = 5, 8, 4 # Create a board for initial sizing only board_temp = crush.new_board( width, height, n_colours) # Same as portrait phone 1 screen~1k, high-score~14k #features_shape = make_features_variable_size(board_temp).shape features_shape = make_features_in_layers(board_temp).shape print(features_shape) #exit(0) # Now, create a simple ?fully-connected? network (MNIST-like sizing) # See : https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py # Does it make sense to do dropout? Perhaps learn over a batch a few times to 'average out' a little? def build_cnn(input_var, features_shape): # Create a CNN of two convolution layers and a fully-connected hidden layer in front of the output layer