예제 #1
0
    def test_get_liberties(self):
        gs = simple_board()
        pp = Preprocess(["liberties"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        # todo - test liberties when > 8

        one_hot_liberties = np.zeros((gs.get_size(), gs.get_size(), 8))
        # black piece at (4,4) has a single liberty: (4,3)
        one_hot_liberties[4, 4, 0] = 1

        # the black group in the top left corner has 2 liberties
        one_hot_liberties[0, 0:3, 1] = 1
        #     .. as do the white pieces on the left and right of the eye
        one_hot_liberties[3, 4, 1] = 1
        one_hot_liberties[5, 4, 1] = 1

        # the white group in the top left corner has 3 liberties
        one_hot_liberties[1, 0:2, 2] = 1
        #     ...as does the white piece at (4,5)
        one_hot_liberties[4, 5, 2] = 1
        #     ...and the black pieces on the sides of the eye
        one_hot_liberties[3, 3, 2] = 1
        one_hot_liberties[5, 3, 2] = 1

        # the black piece at (4,2) has 4 liberties
        one_hot_liberties[4, 2, 3] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties" % (i + 1))
예제 #2
0
def validate_feature_planes(verbose, dataset, model_features):
    """Verify that dataset's features match the model's expected features.
    """

    if 'features' in dataset:
        dataset_features = dataset['features'][()]
        dataset_features = dataset_features.split(",")
        if len(dataset_features) != len(model_features) or \
           any(df != mf for (df, mf) in zip(dataset_features, model_features)):
            raise ValueError(
                "Model JSON file expects features \n\t%s\n"
                "But dataset contains \n\t%s" %
                ("\n\t".join(model_features), "\n\t".join(dataset_features)))
        elif verbose:
            print(
                "Verified that dataset features and model features exactly match."
            )
    else:
        # Cannot check each feature, but can check number of planes.
        n_dataset_planes = dataset["states"].shape[1]
        tmp_preprocess = Preprocess(model_features)
        n_model_planes = tmp_preprocess.get_output_dimension()
        if n_dataset_planes != n_model_planes:
            raise ValueError(
                "Model JSON file expects a total of %d planes from features \n\t%s\n"
                "But dataset contains %d planes" %
                (n_model_planes, "\n\t".join(model_features),
                 n_dataset_planes))
        elif verbose:
            print(
                "Verified agreement of number of model and dataset feature planes, but cannot "
                "verify exact match using old dataset format.")
예제 #3
0
    def test_get_sensibleness(self):
        gs, moves = parseboard.parse("x B . . W . . . .|"
                                     "B B W . . W . . .|"
                                     ". W B B W W . . .|"
                                     ". B y B W W . . .|"
                                     ". B B z B W . . .|"
                                     ". . B B B W . . .|"
                                     ". . . . . . . . W|"
                                     ". . . . . . . . W|"
                                     ". . . . . . . W s|")
        gs.set_current_player(go.BLACK)

        pp = Preprocess(["sensibleness"], size=9)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()), dtype=int)

        for (x, y) in gs.get_legal_moves():
            expectation[x, y] = 1

        # 'x', 'y', and 'z' are eyes - remove them from 'sensible' moves
        expectation[moves['x']] = 0
        expectation[moves['y']] = 0
        expectation[moves['z']] = 0

        # 's' is suicide - should not be legal
        expectation[moves['s']] = 0

        self.assertTrue(np.all(expectation == feature))
예제 #4
0
    def test_get_self_atari_size(self):
        # TODO - at the moment there is no imminent self-atari for white
        gs = simple_board()
        pp = Preprocess(["self_atari_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
예제 #5
0
    def test_get_board(self):
        gs = simple_board()
        pp = Preprocess(["board"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        white_pos = np.asarray([
            [0, 0, 0, 0, 0, 0, 0],
            [1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]])
        black_pos = np.asarray([
            [1, 1, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 1, 0, 0, 0],
            [0, 0, 1, 0, 1, 0, 0],
            [0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]])
        empty_pos = np.ones((gs.get_size(), gs.get_size())) - (white_pos + black_pos)

        # check number of planes
        self.assertEqual(feature.shape, (gs.get_size(), gs.get_size(), 3))
        # check return value against hand-coded expectation
        # (given that current_player is white)
        self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
예제 #6
0
    def __init__(self, feature_list, **kwargs):
        """create a policy object that preprocesses according to feature_list and uses
		a neural network specified by keyword arguments (see create_network())
		"""
        self.preprocessor = Preprocess(feature_list)
        kwargs["input_dim"] = self.preprocessor.output_dim
        self.model = CNNPolicy.create_network(**kwargs)
        self.forward = self._model_forward()
예제 #7
0
    def test_get_legal(self):
        gs = simple_board()
        pp = Preprocess(["legal"], size=7)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        for (x, y) in gs.get_legal_moves():
            expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
예제 #8
0
    def test_get_self_atari_size(self):
        gs = self_atari_board()
        pp = Preprocess(["self_atari_size"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8))
        # self atari of size 1 at position 0,0
        one_hot_self_atari[0, 0, 0] = 1
        # self atari of size 3 at position 3,4
        one_hot_self_atari[3, 4, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
예제 #9
0
    def test_get_self_atari_size_cap(self):
        gs = capture_board()
        pp = Preprocess(["self_atari_size"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8))
        # self atari of size 1 at the ko position and just below it
        one_hot_self_atari[4, 5, 0] = 1
        one_hot_self_atari[3, 6, 0] = 1
        # self atari of size 3 at bottom corner
        one_hot_self_atari[6, 6, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
    def test_get_sensibleness(self):
        # TODO - there are no legal eyes at the moment

        gs = simple_board()
        pp = Preprocess(["sensibleness"])
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
예제 #11
0
    def test_get_ladder_capture(self):
        gs, moves = parseboard.parse(". . . . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . W .|")
        pp = Preprocess(["ladder_capture"], size=7)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
예제 #12
0
    def test_get_capture_size(self):
        # TODO - at the moment there is no imminent capture
        gs = simple_board()
        pp = Preprocess(["capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_capture = np.zeros((gs.size, gs.size, 8))
        # there is no capture available; all legal moves are zero-capture
        for (x, y) in gs.get_legal_moves():
            one_hot_capture[x, y, 0] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
                "bad expectation: capturing %d stones" % i)
예제 #13
0
    def test_get_ladder_escape(self):
        # On this board, playing at 'a' is ladder escape because there is a breaker on the right.
        gs, moves = parseboard.parse(". B B . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . W .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|")
        pp = Preprocess(["ladder_escape"], size=7)
        gs.set_current_player(go.WHITE)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
예제 #14
0
    def __init__(self, feature_list, **kwargs):
        """create a neural net object that preprocesses according to feature_list and uses
        a neural network specified by keyword arguments (using subclass' create_network())

        optional argument: init_network (boolean). If set to False, skips initializing
        self.model and self.forward and the calling function should set them.
        """
        self.preprocessor = Preprocess(feature_list)
        kwargs["input_dim"] = self.preprocessor.output_dim

        if kwargs.get('init_network', True):
            # self.__class__ refers to the subclass so that subclasses only
            # need to override create_network()
            self.model = self.__class__.create_network(**kwargs)
            # self.forward is a lambda function wrapping a Keras function
            self.forward = self._model_forward()
    def test_get_turns_since(self):
        gs = simple_board()
        pp = Preprocess(["turns_since"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_turns = np.zeros((gs.size, gs.size, 8))

        rev_moves = gs.history[::-1]

        for x in range(gs.size):
            for y in range(gs.size):
                if gs.board[x, y] != go.EMPTY:
                    # find most recent move at x, y
                    age = rev_moves.index((x, y))
                    one_hot_turns[x, y, min(age, 7)] = 1

        self.assertTrue(np.all(feature == one_hot_turns))
    def test_get_capture_size(self):
        gs = capture_board()
        pp = Preprocess(["capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        score_before = gs.num_white_prisoners
        one_hot_capture = np.zeros((gs.size, gs.size, 8))
        # there is no capture available; all legal moves are zero-capture
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            num_captured = copy.num_white_prisoners - score_before
            one_hot_capture[x, y, min(7, num_captured)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
                "bad expectation: capturing %d stones" % i)
    def test_get_liberties_after_cap(self):
        """A copy of test_get_liberties_after but where captures are imminent
        """
        gs = capture_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            one_hot_liberties[x, y, min(libs - 1, 7)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
    def test_get_liberties_after(self):
        gs = simple_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        # TODO (?) hand-code?
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            if libs < 7:
                one_hot_liberties[x, y, libs - 1] = 1
            else:
                one_hot_liberties[x, y, 7] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
    def test_feature_concatenation(self):
        gs = simple_board()
        pp = Preprocess(["board", "sensibleness", "capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))

        # first three planes: board
        expectation[:, :, 0] = (gs.board == go.WHITE) * 1
        expectation[:, :, 1] = (gs.board == go.BLACK) * 1
        expectation[:, :, 2] = (gs.board == go.EMPTY) * 1

        # 4th plane: sensibleness (as in test_get_sensibleness)
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y, 3] = 1

        # 5th through 12th plane: capture size (all zero-capture)
        for (x, y) in gs.get_legal_moves():
            expectation[x, y, 4] = 1

        self.assertTrue(np.all(expectation == feature))
예제 #20
0
    def test_two_escapes(self):
        gs, moves = parseboard.parse(". . X . . .|"
                                     ". X O a . .|"
                                     ". X c X . .|"
                                     ". O X b . .|"
                                     ". . O . . .|"
                                     ". . . . . .|")

        # place a white stone at c, and reset player to white
        gs.do_move(moves['c'], color=go.WHITE)
        gs.set_current_player(go.WHITE)

        pp = Preprocess(["ladder_escape"], size=6)
        gs.set_current_player(go.WHITE)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        # both 'a' and 'b' should be considered escape moves for white after 'O' at c

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1
        expectation[moves['b']] = 1

        self.assertTrue(np.all(expectation == feature))
예제 #21
0
def is_ladder_capture(state, move):
    pp = Preprocess(["ladder_capture"], size=state.get_size())
    feature = pp.state_to_tensor(state).squeeze()
    return feature[move] == 1
예제 #22
0
	def __init__(self, features):
		self.feature_processor = Preprocess(features)
		self.n_features = self.feature_processor.output_dim
예제 #23
0
def make_training_pairs(player, opp, features, mini_batch_size, board_size=19):
    """Make training pairs for batch of matches, utilizing player.get_moves (parallel form of
	player.get_move), which calls `CNNPolicy.batch_eval_state`.

	Args:
	player -- player that we're always updating
	opp -- batch opponent
	feature_list -- game features to be one-hot encoded
	mini_batch_size -- number of games in mini-batch

	Return:
	X_list -- list of 1-hot board states associated with moves.
	y_list -- list of 1-hot moves associated with board states.
	winners -- list of winners associated with each game in batch
	"""
    def do_move(states, states_prev, moves, X_list, y_list, player_color):
        bsize_flat = bsize * bsize
        for st, st_prev, mv, X, y in zip(states, states_prev, moves, X_list,
                                         y_list):
            if not st.is_end_of_game:
                # Only do more moves if not end of game already
                st.do_move(mv)
                if st.current_player != player_color and mv is not go.PASS_MOVE:
                    # Convert move to one-hot
                    state_1hot = preprocessor.state_to_tensor(st_prev)
                    move_1hot = np.zeros(bsize_flat)
                    move_1hot[flatten_idx(mv, bsize)] = 1
                    X.append(state_1hot)
                    y.append(move_1hot)
        return states, X_list, y_list

    # Lists of game training pairs (1-hot)
    X_list = [list() for _ in xrange(mini_batch_size)]
    y_list = [list() for _ in xrange(mini_batch_size)]
    preprocessor = Preprocess(features)
    bsize = player.policy.model.input_shape[-1]
    states = [GameState(size=board_size) for i in xrange(mini_batch_size)]
    # Randomly choose who goes first (i.e. color of 'player')
    player_color = np.random.choice([go.BLACK, go.WHITE])
    player1, player2 = (player, opp) if player_color == go.BLACK else \
     (opp, player)
    while True:
        # Cache states before moves
        states_prev = [st.copy() for st in states]
        # Get moves (batch)
        moves_black = player1.get_moves(states)
        # Do moves (black)
        states, X_list, y_list = do_move(states, states_prev, moves_black,
                                         X_list, y_list, player_color)
        # Do moves (white)
        moves_white = player2.get_moves(states)
        states, X_list, y_list = do_move(states, states_prev, moves_white,
                                         X_list, y_list, player_color)
        # If all games have ended, we're done. Get winners.
        done = [st.is_end_of_game for st in states]
        if all(done):
            break
    winners = [st.get_winner() for st in states]
    # Concatenate tensors across turns within each game
    for i in xrange(mini_batch_size):
        X_list[i] = np.concatenate(X_list[i], axis=0)
        y_list[i] = np.vstack(y_list[i])
    return X_list, y_list, winners
def run_training(cmd_line_args=None):
    """Run training. command-line args may be passed in as a list
    """
    import argparse
    parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.')
    # required args
    parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())")  # noqa: E501
    parser.add_argument("train_data", help="A .h5 file of training data")
    parser.add_argument("out_directory", help="directory where metadata and weights will be saved")
    # frequently used args
    parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16)  # noqa: E501
    parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10)  # noqa: E501
    parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None)  # noqa: E501
    parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03)  # noqa: E501
    parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001)  # noqa: E501
    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")  # noqa: E501
    # slightly fancier args
    parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None)  # noqa: E501
    parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02])  # noqa: E501
    parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2')  # noqa: E501
    # TODO - an argument to specify which transformations to use, put it in metadata

    if cmd_line_args is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(cmd_line_args)

    # TODO - what follows here should be refactored into a series of small functions

    resume = args.weights is not None

    if args.verbose:
        if resume:
            print("trying to resume from %s with weights %s" %
                  (args.out_directory, os.path.join(args.out_directory, args.weights)))
        else:
            if os.path.exists(args.out_directory):
                print("directory %s exists. any previous data will be overwritten" %
                      args.out_directory)
            else:
                print("starting fresh output directory %s" % args.out_directory)

    # load model from json spec
    policy = CNNPolicy.load_model(args.model)
    model_features = policy.preprocessor.feature_list
    model = policy.model
    if resume:
        model.load_weights(os.path.join(args.out_directory, args.weights))

    # features of training data
    dataset = h5.File(args.train_data)

    # Verify that dataset's features match the model's expected features.
    if 'features' in dataset:
        dataset_features = dataset['features'][()]
        dataset_features = dataset_features.split(",")
        if len(dataset_features) != len(model_features) or \
           any(df != mf for (df, mf) in zip(dataset_features, model_features)):
            raise ValueError("Model JSON file expects features \n\t%s\n"
                             "But dataset contains \n\t%s" % ("\n\t".join(model_features),
                                                              "\n\t".join(dataset_features)))
        elif args.verbose:
            print("Verified that dataset features and model features exactly match.")
    else:
        # Cannot check each feature, but can check number of planes.
        n_dataset_planes = dataset["states"].shape[1]
        tmp_preprocess = Preprocess(model_features)
        n_model_planes = tmp_preprocess.output_dim
        if n_dataset_planes != n_model_planes:
            raise ValueError("Model JSON file expects a total of %d planes from features \n\t%s\n"
                             "But dataset contains %d planes" % (n_model_planes,
                                                                 "\n\t".join(model_features),
                                                                 n_dataset_planes))
        elif args.verbose:
            print("Verified agreement of number of model and dataset feature planes, but cannot "
                  "verify exact match using old dataset format.")

    n_total_data = len(dataset["states"])
    n_train_data = int(args.train_val_test[0] * n_total_data)
    # Need to make sure training data is divisible by minibatch size or get
    # warning mentioning accuracy from keras
    n_train_data = n_train_data - (n_train_data % args.minibatch)
    n_val_data = n_total_data - n_train_data
    # n_test_data = n_total_data - (n_train_data + n_val_data)

    if args.verbose:
        print("datset loaded")
        print("\t%d total samples" % n_total_data)
        print("\t%d training samples" % n_train_data)
        print("\t%d validaion samples" % n_val_data)

    # ensure output directory is available
    if not os.path.exists(args.out_directory):
        os.makedirs(args.out_directory)

    # create metadata file and the callback object that will write to it
    meta_file = os.path.join(args.out_directory, "metadata.json")
    meta_writer = MetadataWriterCallback(meta_file)
    # load prior data if it already exists
    if os.path.exists(meta_file) and resume:
        with open(meta_file, "r") as f:
            meta_writer.metadata = json.load(f)
        if args.verbose:
            print("previous metadata loaded: %d epochs. new epochs will be appended." %
                  len(meta_writer.metadata["epochs"]))
    elif args.verbose:
        print("starting with empty metadata")
    # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add
    # in anything else we like here
    #
    # TODO - model and train_data are saved in meta_file; check that they match
    # (and make args optional when restarting?)
    meta_writer.metadata["training_data"] = args.train_data
    meta_writer.metadata["model_file"] = args.model
    # Record all command line args in a list so that all args are recorded even
    # when training is stopped and resumed.
    meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", [])
    meta_writer.metadata["cmd_line_args"].append(vars(args))

    # create ModelCheckpoint to save weights every epoch
    checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5")
    checkpointer = ModelCheckpoint(checkpoint_template)

    # load precomputed random-shuffle indices or create them
    # TODO - save each train/val/test indices separately so there's no danger of
    # changing args.train_val_test when resuming
    shuffle_file = os.path.join(args.out_directory, "shuffle.npz")
    if os.path.exists(shuffle_file) and resume:
        with open(shuffle_file, "r") as f:
            shuffle_indices = np.load(f)
        if args.verbose:
            print("loading previous data shuffling indices")
    else:
        # create shuffled indices
        shuffle_indices = np.random.permutation(n_total_data)
        with open(shuffle_file, "w") as f:
            np.save(f, shuffle_indices)
        if args.verbose:
            print("created new data shuffling indices")
    # training indices are the first consecutive set of shuffled indices, val
    # next, then test gets the remainder
    train_indices = shuffle_indices[0:n_train_data]
    val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data]
    # test_indices = shuffle_indices[n_train_data + n_val_data:]

    symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")]

    # create dataset generators
    train_data_generator = shuffled_hdf5_batch_generator(
        dataset["states"],
        dataset["actions"],
        train_indices,
        args.minibatch,
        symmetries)
    val_data_generator = shuffled_hdf5_batch_generator(
        dataset["states"],
        dataset["actions"],
        val_indices,
        args.minibatch,
        symmetries)

    sgd = SGD(lr=args.learning_rate, decay=args.decay)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])

    samples_per_epoch = args.epoch_length or n_train_data

    if args.verbose:
        print("STARTING TRAINING")

    model.fit_generator(
        generator=train_data_generator,
        samples_per_epoch=samples_per_epoch,
        nb_epoch=args.epochs,
        callbacks=[checkpointer, meta_writer],
        validation_data=val_data_generator,
        nb_val_samples=n_val_data)
def generate_data(player_RL, player_SL, hdf5_file, n_training_pairs,
                  batch_size, bd_size, features, verbose, sgf_path):
    # used features
    n_features = Preprocess(features).get_output_dimension()
    # temporary hdf5 file
    tmp_file = os.path.join(os.path.dirname(hdf5_file),
                            ".tmp." + os.path.basename(hdf5_file))
    # open hdf5 file
    h5f = h5py.File(tmp_file, 'w')
    # initialize a new hdf5 file
    h5_states, h5_winners = init_hdf5(h5f, n_features, bd_size)

    # random move distribution administration
    distribution = {key: 0 for key in range(DEAULT_RANDOM_MOVE)}

    if verbose:
        print(str(hdf5_file) + " file initialized.")
        max_value = str(n_training_pairs)

    next_idx = 0
    while True:
        # Randomly choose turn to play uniform random. Move prior will be from SL
        # policy. Moves after will be from RL policy.
        i_rand_move = np.random.choice(range(DEAULT_RANDOM_MOVE))

        # play games
        states, winners = play_batch(player_RL, player_SL, batch_size,
                                     features, i_rand_move, next_idx, sgf_path)

        if states is not None:
            try:
                # get actual batch size in case any pair was removed
                actual_batch_size = len(states)
                # increment random distribution
                distribution[i_rand_move] += actual_batch_size

                # add states and winners to hdf5 file
                h5_states.resize((next_idx + actual_batch_size, n_features,
                                  bd_size, bd_size))
                h5_winners.resize((next_idx + actual_batch_size, 1))
                h5_states[next_idx:] = states
                h5_winners[next_idx:] = winners

                # count saved pairs
                next_idx += actual_batch_size
            except Exception as e:
                warnings.warn(
                    "Unknown error occured during batch save to HDF5 file: {}".
                    format(hdf5_file))  # noqa: E501
                raise e

        if verbose:
            # primitive progress indication
            current = str(next_idx)
            while len(current) < len(max_value):
                current = ' ' + current

            line = 'Progress: ' + current + '/' + max_value

            sys.stdout.write('\b' * len(line))
            sys.stdout.write('\r')
            sys.stdout.write(line)
            sys.stdout.flush()

        # stop data generation when at least n_trainings_pairs have been created
        if n_training_pairs <= next_idx:
            break

    # processing complete: rename tmp_file to hdf5_file
    h5f.close()
    os.rename(tmp_file, hdf5_file)
    if verbose:
        print("Value training data succesfull created.")

        # show random move distribution
        print("\nRandom move distribution:")
        for key in range(DEAULT_RANDOM_MOVE):
            print("Random move: " + str(key) + " " + str(distribution[key]))
def play_batch(player_RL, player_SL, batch_size, features, i_rand_move,
               next_idx, sgf_path):
    """Play a batch of games in parallel and return one training pair from each game.

    As described in Silver et al, the method for generating value net training data is as follows:

    * pick a number between 1 and 450
    * use the supervised-learning policy to play a game against itself up to that number of moves.
    * now go off-policy and pick a totally random move
    * play out the rest of the game with the reinforcement-learning policy
    * save the state that occurred *right after* the random move,
    * and the end result of the game, as the training pair
    """
    def do_move(states, moves):
        for st, mv in zip(states, moves):
            if not st.is_end_of_game():
                # Only do more moves if not end of game already
                st.do_move(mv)
        return states

    def do_rand_move(states):
        """Do a uniform-random move over legal moves and record info for
           training. Only gets called once per game.
        """

        # get legal moves and play one at random
        legal_moves = [st.get_legal_moves() for st in states]
        rand_moves = [lm[np.random.choice(len(lm))] for lm in legal_moves]
        states = do_move(states, rand_moves)

        # copy all states, these are the generated training data
        training_state_list = [st.copy() for st in states
                               ]  # For later 1hot preprocessing
        return training_state_list, states

    def convert(state_list, preprocessor):
        """Convert states to 1-hot and concatenate. X's are game state objects.
        """

        states = np.concatenate(
            [preprocessor.state_to_tensor(state) for state in state_list],
            axis=0)
        return states

    # Lists of game training pairs (1-hot)
    preprocessor = Preprocess(features)
    states = [GameState() for _ in xrange(batch_size)]

    # play player_SL moves
    for _ in xrange(i_rand_move - 1):
        # Get moves (batch)
        batch_moves = player_SL.get_moves(states)
        # Do moves (black)
        states = do_move(states, batch_moves)

    # remove games that are finished
    states = [state for state in states if not state.is_end_of_game()]

    # Make random move
    states_list, states = do_rand_move(states)

    # color is random move player color
    color = WHITE if i_rand_move % 2 == 0 else BLACK

    # play moves with player_RL till game ends
    while True:
        # Get moves (batch)
        batch_moves = player_RL.get_moves(states)
        # Do moves (black)
        states = do_move(states, batch_moves)

        # check if all games are finished
        done = [st.is_end_of_game() for st in states]

        if all(done):
            break

    if sgf_path is not None:
        # number different sgf
        sgf_id = next_idx

        for gm in states:
            # add leading '0'
            file_name = str(sgf_id)
            while len(file_name) < 10:
                file_name = '0' + file_name

            # determine winner
            winner_game = 'WHITE' if gm.get_winner_color(
            ) == WHITE else 'BLACK'
            random_player = 'WHITE' if color == WHITE else 'BLACK'

            # generate file name
            file_name += '_winner_' + winner_game + '_active-player_' + \
                         random_player + '_move_' + str(i_rand_move) + '.sgf'
            # save sgf
            save_gamestate_to_sgf(gm,
                                  sgf_path,
                                  file_name,
                                  result=winner_game + ' ' + str(i_rand_move))
            # increment sgf id count
            sgf_id += 1

    # Concatenate training examples
    training_states = convert(states_list, preprocessor)

    # get winners list relative to 'random move' player color (color)
    # winner BLACK & color Black -> WIN
    # winner WHITE & color WHITE -> WIN
    # winner BLACK & color WHITE -> LOSE
    # winner WHITE & color Black -> LOSE
    actual_batch_size = len(states)
    winners = np.array([
        WIN if st.get_winner_color() == color else LOSE for st in states
    ]).reshape(actual_batch_size, 1)
    return training_states, winners