Beispiel #1
0
def play_game(game_id, model, per_step_discount_factor=0.95, prob_exploration=0.1):
  training_data = dict( board=[], target=[])
  
  np.random.seed(game_id)
  board = crush.new_board(width, height, n_colours) # Same as portrait phone  1 screen~1k,  high-score~14k

  score_total, new_cols_total, moves_total, game_step = 0,0,0,0
  while True: 
    moves = crush.potential_moves(board)
    moves_total += len(moves)
    
    if len(moves)==0:
      # Need to add a training example : This is a zero-score outcome
      training_data['board'].append( make_features_in_layers(board) )
      training_data['target'].append( 0. )
      
      break

    # Let's find the highest-scoring of those moves:  First, get all the features
    next_step_features = []
    next_step_target = []
    for (h,v) in moves:  # [0:2]
      b, score, n_cols = crush.after_move(board, h,v, -1)  # Added columns are unknown
      
      next_step_features.append( make_features_in_layers(b) )
      #next_step_target.append( score )
      next_step_target.append( n_cols )
      
    # Now evaluate the Q() values of the resulting postion for each possible move in one go
    all_features = np.array(next_step_features)  # , dtype='float32'
    #print("all_features.shape", all_features.shape)
    
    remember_training, i = False, -1
    if prob_exploration<0:  # This is testing only - just need to pick the best move
      next_step_q = model_evaluate_features_deterministic( all_features )
    else:
      if np.random.uniform(0.0, 1.0)<prob_exploration:
        ## Choose a random move, and do it
        i = np.random.randint( len(moves) )
      else:
        next_step_q = model_evaluate_features( all_features )
        remember_training=True

    if i<0:
      next_step_aggregate = np.array( next_step_target, dtype='float32') + per_step_discount_factor * next_step_q.flatten()
      #print( next_step_aggregate )
      i = np.argmax( next_step_aggregate )
    
    (h,v) = moves[i]
    
    #print("Move : (%2d,%2d)" % (h,v))
    #crush.show_board(board, highlight=(h,v))
    
    if remember_training:  # Only collect training data if not testing
      training_data['board'].append( make_features_in_layers(board) )
      training_data['target'].append( next_step_aggregate[i] )   # This value includes a Q() that looks at the 'blank cols', rather than the actuals
    
    board, score, new_cols = crush.after_move(board, h,v, n_colours)  # Now we do the move 'for real'
    
    score_total += score
    new_cols_total += new_cols
    
    #print("Move[%2d]=(%2d,%2d) -> Score : %3d, new_cols=%1d" % (i, h,v, score,new_cols))
    #crush.show_board(board, highlight=(0,0))

    game_step += 1
    
  stats=dict( steps=game_step, av_potential_moves=float(moves_total) / game_step, score=score_total, new_cols=new_cols_total )
  return stats, training_data
def play_game(game_id,
              model,
              per_step_discount_factor=0.95,
              prob_exploration=0.1):
    training_data = dict(board=[], target=[])

    np.random.seed(game_id)
    board = crush.new_board(
        width, height,
        n_colours)  # Same as portrait phone  1 screen~1k,  high-score~14k

    score_total, new_cols_total, moves_total, game_step = 0, 0, 0, 0
    while True:
        moves = crush.potential_moves(board)
        moves_total += len(moves)

        if len(moves) == 0:
            # Need to add a training example : This is a zero-score outcome
            training_data['board'].append(make_features_in_layers(board))
            training_data['target'].append(0.)

            break

        # Let's find the highest-scoring of those moves:  First, get all the features
        next_step_features = []
        next_step_target = []
        for (h, v) in moves:  # [0:2]
            b, score, n_cols = crush.after_move(
                board, h, v, -1)  # Added columns are unknown

            next_step_features.append(make_features_in_layers(b))
            #next_step_target.append( score )
            next_step_target.append(n_cols)

        # Now evaluate the Q() values of the resulting postion for each possible move in one go
        all_features = np.array(next_step_features)  # , dtype='float32'
        #print("all_features.shape", all_features.shape)

        remember_training, i = False, -1
        if prob_exploration < 0:  # This is testing only - just need to pick the best move
            next_step_q = model_evaluate_features_deterministic(all_features)
        else:
            if np.random.uniform(0.0, 1.0) < prob_exploration:
                ## Choose a random move, and do it
                i = np.random.randint(len(moves))
            else:
                next_step_q = model_evaluate_features(all_features)
                remember_training = True

        if i < 0:
            next_step_aggregate = np.array(
                next_step_target, dtype='float32'
            ) + per_step_discount_factor * next_step_q.flatten()
            #print( next_step_aggregate )
            i = np.argmax(next_step_aggregate)

        (h, v) = moves[i]

        #print("Move : (%2d,%2d)" % (h,v))
        #crush.show_board(board, highlight=(h,v))

        if remember_training:  # Only collect training data if not testing
            training_data['board'].append(make_features_in_layers(board))
            training_data['target'].append(
                next_step_aggregate[i]
            )  # This value includes a Q() that looks at the 'blank cols', rather than the actuals

        board, score, new_cols = crush.after_move(
            board, h, v, n_colours)  # Now we do the move 'for real'

        score_total += score
        new_cols_total += new_cols

        #print("Move[%2d]=(%2d,%2d) -> Score : %3d, new_cols=%1d" % (i, h,v, score,new_cols))
        #crush.show_board(board, highlight=(0,0))

        game_step += 1

    stats = dict(steps=game_step,
                 av_potential_moves=float(moves_total) / game_step,
                 score=score_total,
                 new_cols=new_cols_total)
    return stats, training_data
Beispiel #3
0
    
    sameness[:-shift_right,:] = np.equal( board[:-shift_right, :], board[shift_right:, :] )*1.
    #print(sameness)

    feature_layers.append( sameness )
  
  stacked = np.dstack( feature_layers )
  return np.rollaxis( stacked, 2, 0 )


#width, height, n_colours = 10,14,5
width, height, n_colours = 5,8,4


# Create a board for initial sizing only
board_temp = crush.new_board(width, height, n_colours) # Same as portrait phone  1 screen~1k,  high-score~14k

#features_shape = make_features_variable_size(board_temp).shape
features_shape = make_features_in_layers(board_temp).shape
print( features_shape )
#exit(0)

# Now, create a simple ?fully-connected? network (MNIST-like sizing)
#    See : https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
#      Does it make sense to do dropout?  Perhaps learn over a batch a few times to 'average out' a little?
def build_cnn(input_var, features_shape):
    # Create a CNN of two convolution layers and a fully-connected hidden layer in front of the output layer
    
    lasagne.random.set_rng( np.random )  # np.random.RandomState.get_state()

    # Input layer, as usual:
        sameness[:-shift_right, :] = np.equal(board[:-shift_right, :],
                                              board[shift_right:, :]) * 1.
        #print(sameness)

        feature_layers.append(sameness)

    stacked = np.dstack(feature_layers)
    return np.rollaxis(stacked, 2, 0)


#width, height, n_colours = 10,14,5
width, height, n_colours = 5, 8, 4

# Create a board for initial sizing only
board_temp = crush.new_board(
    width, height,
    n_colours)  # Same as portrait phone  1 screen~1k,  high-score~14k

#features_shape = make_features_variable_size(board_temp).shape
features_shape = make_features_in_layers(board_temp).shape
print(features_shape)

#exit(0)


# Now, create a simple ?fully-connected? network (MNIST-like sizing)
#    See : https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
#      Does it make sense to do dropout?  Perhaps learn over a batch a few times to 'average out' a little?
def build_cnn(input_var, features_shape):
    # Create a CNN of two convolution layers and a fully-connected hidden layer in front of the output layer