def replay(self, wps, pi_mcts, board_logs, plus_turns, weights, batch_size: int, beta: float) -> None: inputs = np.zeros((batch_size, 7, 5, 3)) policy_true = np.zeros((batch_size, 315)) values_true = np.zeros((batch_size)) input_weights = np.zeros((batch_size)) indices = np.random.choice(np.arange(len(wps)), size=batch_size, replace=False) mini_batch = [(wps[i], pi_mcts[i], board_logs[i], plus_turns[i], weights[i]) for i in indices] for i, (winner, pi, board, plus_turn, weight) in enumerate(mini_batch): gs = GameState() gs.board = board inputs[i] = gs.to_inputs(flip=not plus_turn) # shape=(4, 5, 5) policy_true[i] = pi**beta values_true[i] = winner input_weights[i] = weight # epochsは訓練データの反復回数、verbose=0は表示なしの設定 self.model.fit(inputs, [policy_true, values_true], sample_weight=input_weights, epochs=1, verbose=0, shuffle=True)