def _worker_run(games_idxs): global _env_ import self_play from dots_boxes.dots_boxes_game import BoxesState from utils.utils import write_to_hdf import time loop = asyncio.get_event_loop() tick = time.time() try: _env_.sp = self_play.SelfPlay(_env_.nnet, _env_.params) _env_.sp.set_player_change_callback(_player_change_callback) loop.run_until_complete(_env_.sp.play_games(BoxesState(), games_idxs, show_progress=False)) except Exception as e: print(e, flush=True) raise e tack = time.time() df = _env_.sp.get_datasets(_env_.generations, not _env_.compare_models) if not _env_.compare_models: df["training"] = np.zeros(len(df.index), dtype=np.int8) with _env_.hdf_lock: write_to_hdf(_env_.hdf_file_name, "fresh", df) tock = time.time() logger.warning("Worker %s played %d games (%d samples) in %.0fs (save=%.3fs)", _env_.name, len(games_idxs), len(df.index), tock-tick, tock-tack)
def load_boards_samples(): df = pd.read_csv("test/test_boards.csv", comment="#", sep=";", index_col="id") df = df.applymap(lambda s: list(map(int, s.split(" "))) if isinstance(s, str) else s) games = [] for idx, sample in df.iterrows(): g = BoxesState() for m in sample.moves: g.play_(int(m)) games.append(g) df["game"] = games return df
def test_moves_to_string(self): g = BoxesState() moves = g.get_valid_moves(True) s = moves_to_string(moves[:10] + moves[-10:]) result = """------------------------------ Player = 0 Next player = 1 Boxes to close = [1.5, 2.5] Result = None +---+---+---+ | 1 | +---+---+---+ | 0 | 0 | 0 | +---+---+---+ | 1 | | | +---+ + + """ self.assertEqual(s, result)
def game_to_tikz(moves, next_move, probs=[], bw=False, dims=(3, 3)): BoxesState.init_static_fields((dims, )) bs = BoxesState() bs.to_play = 1 tikz = BoxesGameTikz(*dims, bw) for m in moves: tikz.draw_move(m, style="" if m != moves[-1] else "line width=1.0") closed = bs.play_(m) if closed: tikz.fill_boxes(bs.just_played, *closed) probs = np.asarray(probs) max_probs = probs.argsort() #[:-5:-1] ps = "" if max_probs is not None and max_probs.any(): probs = probs.round(2) for i in max_probs: if probs[i] > 0.05 or i in next_move: p = f"{probs[i]:.2f}".lstrip('0').rstrip('0') if probs[i] == 0: p = '.0' ps += f"{i}->{p};" tikz.draw_move(i, bs.to_play if i in next_move else None, p, "" if i in next_move else "gray") else: tikz.draw_move(next_move, bs.to_play, "$\\times$") s = str(bs) s += f"probs={ps}\nnext_move={next_move}" s = "\n".join(map(lambda line: "% " + line, s.split("\n"))) return s + "\n" + tikz.make()
def test_repr(self): state = BoxesState() for m in state.get_valid_moves(as_indices=True)[:10]: state.play_(m) for m in state.get_valid_moves(as_indices=True)[-10:]: state.play_(m) result = """------------------------------ Player = 0 Next player = 1 Boxes to close = [1.5, 2.5] Result = None +---+---+---+ | | +---+---+---+ | | | | +---+---+---+ | | | | +---+ + + """ self.assertEqual(str(state), result)
CPUCT = (1.25, 19652) params = PARAMS params.rewrite_str("data/", f"../data/") params.rewrite_str("_exp_", RUN) params.self_play.pytorch_devices = "cuda:0" SAMPLES = pd.read_csv("../test/test_boards.csv", comment="#", sep=";", index_col="id") SAMPLES = SAMPLES.applymap(lambda s: list(map(int, s.split(" "))) if isinstance(s, str) else s) games = [] for idx, sample in SAMPLES.iterrows(): g = BoxesState() for m in sample.moves: g.play_(int(m)) games.append(g) SAMPLES["game"] = games async def test_mcts_nn(loop, generation): sp_params = params.self_play model = params.nn.model_class(params) model.load_parameters(generation, to_device=params.self_play.pytorch_devices) nn_wrapper = NeuralNetWrapper(model, params) nnet = AsyncBatchedProxy(nn_wrapper,
def setUp(self): BoxesState.init_static_fields(dims=(3, 3))
def test_hash(self): state = BoxesState() state1 = state.play(0) self.assertEqual(state, copy.deepcopy(state)) self.assertNotEqual(state, state1)