def test_get_liberties(self): gs = simple_board() pp = Preprocess(["liberties"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) # todo - test liberties when > 8 one_hot_liberties = np.zeros((gs.get_size(), gs.get_size(), 8)) # black piece at (4,4) has a single liberty: (4,3) one_hot_liberties[4, 4, 0] = 1 # the black group in the top left corner has 2 liberties one_hot_liberties[0, 0:3, 1] = 1 # .. as do the white pieces on the left and right of the eye one_hot_liberties[3, 4, 1] = 1 one_hot_liberties[5, 4, 1] = 1 # the white group in the top left corner has 3 liberties one_hot_liberties[1, 0:2, 2] = 1 # ...as does the white piece at (4,5) one_hot_liberties[4, 5, 2] = 1 # ...and the black pieces on the sides of the eye one_hot_liberties[3, 3, 2] = 1 one_hot_liberties[5, 3, 2] = 1 # the black piece at (4,2) has 4 liberties one_hot_liberties[4, 2, 3] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties" % (i + 1))
def validate_feature_planes(verbose, dataset, model_features): """Verify that dataset's features match the model's expected features. """ if 'features' in dataset: dataset_features = dataset['features'][()] dataset_features = dataset_features.split(",") if len(dataset_features) != len(model_features) or \ any(df != mf for (df, mf) in zip(dataset_features, model_features)): raise ValueError( "Model JSON file expects features \n\t%s\n" "But dataset contains \n\t%s" % ("\n\t".join(model_features), "\n\t".join(dataset_features))) elif verbose: print( "Verified that dataset features and model features exactly match." ) else: # Cannot check each feature, but can check number of planes. n_dataset_planes = dataset["states"].shape[1] tmp_preprocess = Preprocess(model_features) n_model_planes = tmp_preprocess.get_output_dimension() if n_dataset_planes != n_model_planes: raise ValueError( "Model JSON file expects a total of %d planes from features \n\t%s\n" "But dataset contains %d planes" % (n_model_planes, "\n\t".join(model_features), n_dataset_planes)) elif verbose: print( "Verified agreement of number of model and dataset feature planes, but cannot " "verify exact match using old dataset format.")
def test_get_sensibleness(self): gs, moves = parseboard.parse("x B . . W . . . .|" "B B W . . W . . .|" ". W B B W W . . .|" ". B y B W W . . .|" ". B B z B W . . .|" ". . B B B W . . .|" ". . . . . . . . W|" ". . . . . . . . W|" ". . . . . . . W s|") gs.set_current_player(go.BLACK) pp = Preprocess(["sensibleness"], size=9) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size()), dtype=int) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 # 'x', 'y', and 'z' are eyes - remove them from 'sensible' moves expectation[moves['x']] = 0 expectation[moves['y']] = 0 expectation[moves['z']] = 0 # 's' is suicide - should not be legal expectation[moves['s']] = 0 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size(self): # TODO - at the moment there is no imminent self-atari for white gs = simple_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
def test_get_board(self): gs = simple_board() pp = Preprocess(["board"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) white_pos = np.asarray([ [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) black_pos = np.asarray([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) empty_pos = np.ones((gs.get_size(), gs.get_size())) - (white_pos + black_pos) # check number of planes self.assertEqual(feature.shape, (gs.get_size(), gs.get_size(), 3)) # check return value against hand-coded expectation # (given that current_player is white) self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward()
def test_get_legal(self): gs = simple_board() pp = Preprocess(["legal"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size(self): gs = self_atari_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at position 0,0 one_hot_self_atari[0, 0, 0] = 1 # self atari of size 3 at position 3,4 one_hot_self_atari[3, 4, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_self_atari_size_cap(self): gs = capture_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at the ko position and just below it one_hot_self_atari[4, 5, 0] = 1 one_hot_self_atari[3, 6, 0] = 1 # self atari of size 3 at bottom corner one_hot_self_atari[6, 6, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_sensibleness(self): # TODO - there are no legal eyes at the moment gs = simple_board() pp = Preprocess(["sensibleness"]) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.size, gs.size)) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_ladder_capture(self): gs, moves = parseboard.parse(". . . . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . . .|" ". . . . . . .|" ". . . . . W .|") pp = Preprocess(["ladder_capture"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_capture_size(self): # TODO - at the moment there is no imminent capture gs = simple_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): one_hot_capture[x, y, 0] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_ladder_escape(self): # On this board, playing at 'a' is ladder escape because there is a breaker on the right. gs, moves = parseboard.parse(". B B . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . W .|" ". . . . . . .|" ". . . . . . .|") pp = Preprocess(["ladder_escape"], size=7) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def __init__(self, feature_list, **kwargs): """create a neural net object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (using subclass' create_network()) optional argument: init_network (boolean). If set to False, skips initializing self.model and self.forward and the calling function should set them. """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim if kwargs.get('init_network', True): # self.__class__ refers to the subclass so that subclasses only # need to override create_network() self.model = self.__class__.create_network(**kwargs) # self.forward is a lambda function wrapping a Keras function self.forward = self._model_forward()
def test_get_turns_since(self): gs = simple_board() pp = Preprocess(["turns_since"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_turns = np.zeros((gs.size, gs.size, 8)) rev_moves = gs.history[::-1] for x in range(gs.size): for y in range(gs.size): if gs.board[x, y] != go.EMPTY: # find most recent move at x, y age = rev_moves.index((x, y)) one_hot_turns[x, y, min(age, 7)] = 1 self.assertTrue(np.all(feature == one_hot_turns))
def test_get_capture_size(self): gs = capture_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) score_before = gs.num_white_prisoners one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) num_captured = copy.num_white_prisoners - score_before one_hot_capture[x, y, min(7, num_captured)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_liberties_after_cap(self): """A copy of test_get_liberties_after but where captures are imminent """ gs = capture_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] one_hot_liberties[x, y, min(libs - 1, 7)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_get_liberties_after(self): gs = simple_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) # TODO (?) hand-code? for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] if libs < 7: one_hot_liberties[x, y, libs - 1] = 1 else: one_hot_liberties[x, y, 7] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_feature_concatenation(self): gs = simple_board() pp = Preprocess(["board", "sensibleness", "capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8)) # first three planes: board expectation[:, :, 0] = (gs.board == go.WHITE) * 1 expectation[:, :, 1] = (gs.board == go.BLACK) * 1 expectation[:, :, 2] = (gs.board == go.EMPTY) * 1 # 4th plane: sensibleness (as in test_get_sensibleness) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y, 3] = 1 # 5th through 12th plane: capture size (all zero-capture) for (x, y) in gs.get_legal_moves(): expectation[x, y, 4] = 1 self.assertTrue(np.all(expectation == feature))
def test_two_escapes(self): gs, moves = parseboard.parse(". . X . . .|" ". X O a . .|" ". X c X . .|" ". O X b . .|" ". . O . . .|" ". . . . . .|") # place a white stone at c, and reset player to white gs.do_move(moves['c'], color=go.WHITE) gs.set_current_player(go.WHITE) pp = Preprocess(["ladder_escape"], size=6) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose # both 'a' and 'b' should be considered escape moves for white after 'O' at c expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 expectation[moves['b']] = 1 self.assertTrue(np.all(expectation == feature))
def is_ladder_capture(state, move): pp = Preprocess(["ladder_capture"], size=state.get_size()) feature = pp.state_to_tensor(state).squeeze() return feature[move] == 1
def __init__(self, features): self.feature_processor = Preprocess(features) self.n_features = self.feature_processor.output_dim
def make_training_pairs(player, opp, features, mini_batch_size, board_size=19): """Make training pairs for batch of matches, utilizing player.get_moves (parallel form of player.get_move), which calls `CNNPolicy.batch_eval_state`. Args: player -- player that we're always updating opp -- batch opponent feature_list -- game features to be one-hot encoded mini_batch_size -- number of games in mini-batch Return: X_list -- list of 1-hot board states associated with moves. y_list -- list of 1-hot moves associated with board states. winners -- list of winners associated with each game in batch """ def do_move(states, states_prev, moves, X_list, y_list, player_color): bsize_flat = bsize * bsize for st, st_prev, mv, X, y in zip(states, states_prev, moves, X_list, y_list): if not st.is_end_of_game: # Only do more moves if not end of game already st.do_move(mv) if st.current_player != player_color and mv is not go.PASS_MOVE: # Convert move to one-hot state_1hot = preprocessor.state_to_tensor(st_prev) move_1hot = np.zeros(bsize_flat) move_1hot[flatten_idx(mv, bsize)] = 1 X.append(state_1hot) y.append(move_1hot) return states, X_list, y_list # Lists of game training pairs (1-hot) X_list = [list() for _ in xrange(mini_batch_size)] y_list = [list() for _ in xrange(mini_batch_size)] preprocessor = Preprocess(features) bsize = player.policy.model.input_shape[-1] states = [GameState(size=board_size) for i in xrange(mini_batch_size)] # Randomly choose who goes first (i.e. color of 'player') player_color = np.random.choice([go.BLACK, go.WHITE]) player1, player2 = (player, opp) if player_color == go.BLACK else \ (opp, player) while True: # Cache states before moves states_prev = [st.copy() for st in states] # Get moves (batch) moves_black = player1.get_moves(states) # Do moves (black) states, X_list, y_list = do_move(states, states_prev, moves_black, X_list, y_list, player_color) # Do moves (white) moves_white = player2.get_moves(states) states, X_list, y_list = do_move(states, states_prev, moves_white, X_list, y_list, player_color) # If all games have ended, we're done. Get winners. done = [st.is_end_of_game for st in states] if all(done): break winners = [st.get_winner() for st in states] # Concatenate tensors across turns within each game for i in xrange(mini_batch_size): X_list[i] = np.concatenate(X_list[i], axis=0) y_list[i] = np.vstack(y_list[i]) return X_list, y_list, winners
def run_training(cmd_line_args=None): """Run training. command-line args may be passed in as a list """ import argparse parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.') # required args parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())") # noqa: E501 parser.add_argument("train_data", help="A .h5 file of training data") parser.add_argument("out_directory", help="directory where metadata and weights will be saved") # frequently used args parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16) # noqa: E501 parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10) # noqa: E501 parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None) # noqa: E501 parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03) # noqa: E501 parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001) # noqa: E501 parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") # noqa: E501 # slightly fancier args parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None) # noqa: E501 parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02]) # noqa: E501 parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2') # noqa: E501 # TODO - an argument to specify which transformations to use, put it in metadata if cmd_line_args is None: args = parser.parse_args() else: args = parser.parse_args(cmd_line_args) # TODO - what follows here should be refactored into a series of small functions resume = args.weights is not None if args.verbose: if resume: print("trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights))) else: if os.path.exists(args.out_directory): print("directory %s exists. any previous data will be overwritten" % args.out_directory) else: print("starting fresh output directory %s" % args.out_directory) # load model from json spec policy = CNNPolicy.load_model(args.model) model_features = policy.preprocessor.feature_list model = policy.model if resume: model.load_weights(os.path.join(args.out_directory, args.weights)) # features of training data dataset = h5.File(args.train_data) # Verify that dataset's features match the model's expected features. if 'features' in dataset: dataset_features = dataset['features'][()] dataset_features = dataset_features.split(",") if len(dataset_features) != len(model_features) or \ any(df != mf for (df, mf) in zip(dataset_features, model_features)): raise ValueError("Model JSON file expects features \n\t%s\n" "But dataset contains \n\t%s" % ("\n\t".join(model_features), "\n\t".join(dataset_features))) elif args.verbose: print("Verified that dataset features and model features exactly match.") else: # Cannot check each feature, but can check number of planes. n_dataset_planes = dataset["states"].shape[1] tmp_preprocess = Preprocess(model_features) n_model_planes = tmp_preprocess.output_dim if n_dataset_planes != n_model_planes: raise ValueError("Model JSON file expects a total of %d planes from features \n\t%s\n" "But dataset contains %d planes" % (n_model_planes, "\n\t".join(model_features), n_dataset_planes)) elif args.verbose: print("Verified agreement of number of model and dataset feature planes, but cannot " "verify exact match using old dataset format.") n_total_data = len(dataset["states"]) n_train_data = int(args.train_val_test[0] * n_total_data) # Need to make sure training data is divisible by minibatch size or get # warning mentioning accuracy from keras n_train_data = n_train_data - (n_train_data % args.minibatch) n_val_data = n_total_data - n_train_data # n_test_data = n_total_data - (n_train_data + n_val_data) if args.verbose: print("datset loaded") print("\t%d total samples" % n_total_data) print("\t%d training samples" % n_train_data) print("\t%d validaion samples" % n_val_data) # ensure output directory is available if not os.path.exists(args.out_directory): os.makedirs(args.out_directory) # create metadata file and the callback object that will write to it meta_file = os.path.join(args.out_directory, "metadata.json") meta_writer = MetadataWriterCallback(meta_file) # load prior data if it already exists if os.path.exists(meta_file) and resume: with open(meta_file, "r") as f: meta_writer.metadata = json.load(f) if args.verbose: print("previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"])) elif args.verbose: print("starting with empty metadata") # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add # in anything else we like here # # TODO - model and train_data are saved in meta_file; check that they match # (and make args optional when restarting?) meta_writer.metadata["training_data"] = args.train_data meta_writer.metadata["model_file"] = args.model # Record all command line args in a list so that all args are recorded even # when training is stopped and resumed. meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []) meta_writer.metadata["cmd_line_args"].append(vars(args)) # create ModelCheckpoint to save weights every epoch checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5") checkpointer = ModelCheckpoint(checkpoint_template) # load precomputed random-shuffle indices or create them # TODO - save each train/val/test indices separately so there's no danger of # changing args.train_val_test when resuming shuffle_file = os.path.join(args.out_directory, "shuffle.npz") if os.path.exists(shuffle_file) and resume: with open(shuffle_file, "r") as f: shuffle_indices = np.load(f) if args.verbose: print("loading previous data shuffling indices") else: # create shuffled indices shuffle_indices = np.random.permutation(n_total_data) with open(shuffle_file, "w") as f: np.save(f, shuffle_indices) if args.verbose: print("created new data shuffling indices") # training indices are the first consecutive set of shuffled indices, val # next, then test gets the remainder train_indices = shuffle_indices[0:n_train_data] val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data] # test_indices = shuffle_indices[n_train_data + n_val_data:] symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")] # create dataset generators train_data_generator = shuffled_hdf5_batch_generator( dataset["states"], dataset["actions"], train_indices, args.minibatch, symmetries) val_data_generator = shuffled_hdf5_batch_generator( dataset["states"], dataset["actions"], val_indices, args.minibatch, symmetries) sgd = SGD(lr=args.learning_rate, decay=args.decay) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"]) samples_per_epoch = args.epoch_length or n_train_data if args.verbose: print("STARTING TRAINING") model.fit_generator( generator=train_data_generator, samples_per_epoch=samples_per_epoch, nb_epoch=args.epochs, callbacks=[checkpointer, meta_writer], validation_data=val_data_generator, nb_val_samples=n_val_data)
def generate_data(player_RL, player_SL, hdf5_file, n_training_pairs, batch_size, bd_size, features, verbose, sgf_path): # used features n_features = Preprocess(features).get_output_dimension() # temporary hdf5 file tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) # open hdf5 file h5f = h5py.File(tmp_file, 'w') # initialize a new hdf5 file h5_states, h5_winners = init_hdf5(h5f, n_features, bd_size) # random move distribution administration distribution = {key: 0 for key in range(DEAULT_RANDOM_MOVE)} if verbose: print(str(hdf5_file) + " file initialized.") max_value = str(n_training_pairs) next_idx = 0 while True: # Randomly choose turn to play uniform random. Move prior will be from SL # policy. Moves after will be from RL policy. i_rand_move = np.random.choice(range(DEAULT_RANDOM_MOVE)) # play games states, winners = play_batch(player_RL, player_SL, batch_size, features, i_rand_move, next_idx, sgf_path) if states is not None: try: # get actual batch size in case any pair was removed actual_batch_size = len(states) # increment random distribution distribution[i_rand_move] += actual_batch_size # add states and winners to hdf5 file h5_states.resize((next_idx + actual_batch_size, n_features, bd_size, bd_size)) h5_winners.resize((next_idx + actual_batch_size, 1)) h5_states[next_idx:] = states h5_winners[next_idx:] = winners # count saved pairs next_idx += actual_batch_size except Exception as e: warnings.warn( "Unknown error occured during batch save to HDF5 file: {}". format(hdf5_file)) # noqa: E501 raise e if verbose: # primitive progress indication current = str(next_idx) while len(current) < len(max_value): current = ' ' + current line = 'Progress: ' + current + '/' + max_value sys.stdout.write('\b' * len(line)) sys.stdout.write('\r') sys.stdout.write(line) sys.stdout.flush() # stop data generation when at least n_trainings_pairs have been created if n_training_pairs <= next_idx: break # processing complete: rename tmp_file to hdf5_file h5f.close() os.rename(tmp_file, hdf5_file) if verbose: print("Value training data succesfull created.") # show random move distribution print("\nRandom move distribution:") for key in range(DEAULT_RANDOM_MOVE): print("Random move: " + str(key) + " " + str(distribution[key]))
def play_batch(player_RL, player_SL, batch_size, features, i_rand_move, next_idx, sgf_path): """Play a batch of games in parallel and return one training pair from each game. As described in Silver et al, the method for generating value net training data is as follows: * pick a number between 1 and 450 * use the supervised-learning policy to play a game against itself up to that number of moves. * now go off-policy and pick a totally random move * play out the rest of the game with the reinforcement-learning policy * save the state that occurred *right after* the random move, * and the end result of the game, as the training pair """ def do_move(states, moves): for st, mv in zip(states, moves): if not st.is_end_of_game(): # Only do more moves if not end of game already st.do_move(mv) return states def do_rand_move(states): """Do a uniform-random move over legal moves and record info for training. Only gets called once per game. """ # get legal moves and play one at random legal_moves = [st.get_legal_moves() for st in states] rand_moves = [lm[np.random.choice(len(lm))] for lm in legal_moves] states = do_move(states, rand_moves) # copy all states, these are the generated training data training_state_list = [st.copy() for st in states ] # For later 1hot preprocessing return training_state_list, states def convert(state_list, preprocessor): """Convert states to 1-hot and concatenate. X's are game state objects. """ states = np.concatenate( [preprocessor.state_to_tensor(state) for state in state_list], axis=0) return states # Lists of game training pairs (1-hot) preprocessor = Preprocess(features) states = [GameState() for _ in xrange(batch_size)] # play player_SL moves for _ in xrange(i_rand_move - 1): # Get moves (batch) batch_moves = player_SL.get_moves(states) # Do moves (black) states = do_move(states, batch_moves) # remove games that are finished states = [state for state in states if not state.is_end_of_game()] # Make random move states_list, states = do_rand_move(states) # color is random move player color color = WHITE if i_rand_move % 2 == 0 else BLACK # play moves with player_RL till game ends while True: # Get moves (batch) batch_moves = player_RL.get_moves(states) # Do moves (black) states = do_move(states, batch_moves) # check if all games are finished done = [st.is_end_of_game() for st in states] if all(done): break if sgf_path is not None: # number different sgf sgf_id = next_idx for gm in states: # add leading '0' file_name = str(sgf_id) while len(file_name) < 10: file_name = '0' + file_name # determine winner winner_game = 'WHITE' if gm.get_winner_color( ) == WHITE else 'BLACK' random_player = 'WHITE' if color == WHITE else 'BLACK' # generate file name file_name += '_winner_' + winner_game + '_active-player_' + \ random_player + '_move_' + str(i_rand_move) + '.sgf' # save sgf save_gamestate_to_sgf(gm, sgf_path, file_name, result=winner_game + ' ' + str(i_rand_move)) # increment sgf id count sgf_id += 1 # Concatenate training examples training_states = convert(states_list, preprocessor) # get winners list relative to 'random move' player color (color) # winner BLACK & color Black -> WIN # winner WHITE & color WHITE -> WIN # winner BLACK & color WHITE -> LOSE # winner WHITE & color Black -> LOSE actual_batch_size = len(states) winners = np.array([ WIN if st.get_winner_color() == color else LOSE for st in states ]).reshape(actual_batch_size, 1) return training_states, winners