def _start_learning(config, end_stats=None, checkpoint=None): print("Writing logs and checkpoints to:", config.path) print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, config.nn_depth)) with open(os.path.join(config.path, "config.json"), "w") as fp: fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") game = pyspiel.load_game(config.game) actors = [ spawn.Process(actor, kwargs={ "game": game, "config": config, "num": i, "checkpoint": checkpoint }) for i in range(config.actors) ] evaluators = [ spawn.Process(evaluator, kwargs={ "game": game, "config": config, "num": i, "checkpoint": checkpoint }) for i in range(config.evaluators) ] def broadcast(msg): for proc in actors + evaluators: proc.queue.put(msg) try: learner( game=game, config=config, actors=actors, # pylint: disable=missing-kwoa evaluators=evaluators, broadcast_fn=broadcast, end_stats=end_stats, checkpoint=checkpoint) except (KeyboardInterrupt, EOFError): print("Caught a KeyboardInterrupt, stopping early.") finally: broadcast("") for proc in actors + evaluators: proc.join()
def test_spawn_works(self): max_sleep_time = 0.01 # 10ms def worker_fn(worker_id, queue): queue.put(worker_id) # Show it's up and running. random.seed(time.time() + worker_id) while True: value = queue.get() if value is None: break time.sleep(max_sleep_time * random.random()) queue.put((worker_id, value)) num_workers = 5 workers = [spawn.Process(worker_fn, kwargs={"worker_id": i}) for i in range(num_workers)] # Make sure they're warmed up. for worker_id, worker in enumerate(workers): self.assertEqual(worker_id, worker.queue.get()) num_work_units = 40 expected_output = [] for worker_id, worker in enumerate(workers): for i in range(num_work_units): worker.queue.put(i) expected_output.append((worker_id, i)) worker.queue.put(None) start_time = time.time() output = [] i = 0 while len(output) < len(expected_output): for worker in workers: try: output.append(worker.queue.get_nowait()) except spawn.Empty: pass time.sleep(0.001) i += 1 self.assertLess(time.time() - start_time, 20 * max_sleep_time * num_work_units, msg=f"Don't wait forever. Loop {i}, found {len(output)}") time_taken = time.time() - start_time print("Finished in {:.3f}s, {:.2f}x the max".format( time_taken, time_taken / (max_sleep_time * num_work_units))) for worker in workers: worker.join() # All messages arrived self.assertLen(output, len(expected_output)) self.assertCountEqual(output, expected_output) # The messages arrived out of order, showing parallelism. self.assertNotEqual(output, expected_output)
def alpha_zero(config: Config): """Start all the worker processes for a full alphazero setup.""" game = pyspiel.load_game(config.game) config = config._replace( observation_shape=game.observation_tensor_shape(), output_size=game.num_distinct_actions()) print("Starting game", config.game) if game.num_players() != 2: sys.exit("AlphaZero can only handle 2-player games.") game_type = game.get_type() if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: raise ValueError("Game must have terminal rewards.") if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: raise ValueError("Game must have sequential turns.") if game_type.chance_mode != pyspiel.GameType.ChanceMode.DETERMINISTIC: raise ValueError("Game must be deterministic.") path = config.path if not path: path = tempfile.mkdtemp(prefix="az-{}-{}-".format( datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"), config.game)) config = config._replace(path=path) if not os.path.exists(path): os.makedirs(path) if not os.path.isdir(path): sys.exit("{} isn't a directory".format(path)) print("Writing logs and checkpoints to:", path) print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, config.nn_depth)) with open(os.path.join(config.path, "config.json"), "w") as fp: fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") actors = [spawn.Process(actor, kwargs={"game": game, "config": config, "num": i}) for i in range(config.actors)] evaluators = [spawn.Process(evaluator, kwargs={"game": game, "config": config, "num": i}) for i in range(config.evaluators)] def broadcast(msg): for proc in actors + evaluators: proc.queue.put(msg) try: learner(game=game, config=config, actors=actors, # pylint: disable=missing-kwoa evaluators=evaluators, broadcast_fn=broadcast) except (KeyboardInterrupt, EOFError): print("Caught a KeyboardInterrupt, stopping early.") finally: broadcast("") # for actor processes to join we have to make sure that their q_in is empty, # including backed up items for proc in actors: while proc.exitcode is None: while not proc.queue.empty(): proc.queue.get_nowait() proc.join(JOIN_WAIT_DELAY) for proc in evaluators: proc.join()
def inner_one(config, model_num): game = pyspiel.load_game(config.game) config = config._replace( observation_shape=game.observation_tensor_shape(), output_size=game.num_distinct_actions()) print("Starting game", config.game) if game.num_players() != 2: sys.exit("AlphaZero can only handle 2-player games.") game_type = game.get_type() if game_type.reward_model != pyspiel.GameType.RewardModel.TERMINAL: raise ValueError("Game must have terminal rewards.") if game_type.dynamics != pyspiel.GameType.Dynamics.SEQUENTIAL: raise ValueError("Game must have sequential turns.") if game_type.chance_mode != pyspiel.GameType.ChanceMode.DETERMINISTIC: raise ValueError("Game must be deterministic.") path = config.path if not path: path = tempfile.mkdtemp(prefix="az-{}-{}-".format( datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"), config.game)) config = config._replace(path=path) if not os.path.exists(path): os.makedirs(path) if not os.path.isdir(path): sys.exit("{} isn't a directory".format(path)) print("Writing logs and checkpoints to:", path) print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, config.nn_depth)) if model_num == 1: with open(os.path.join(config.path, "config_1.json"), "w") as fp: fp.write( json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") else: with open(os.path.join(config.path, "config_2.json"), "w") as fp: fp.write( json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") actors = [ spawn.Process(actor, kwargs={ "game": game, "config": config, "num": i }) for i in range(config.actors) ] evaluators = [ spawn.Process(evaluator, kwargs={ "game": game, "config": config, "num": i }) for i in range(config.evaluators) ] return game, config, actors, evaluators
def from_checkpoint(path, checkpoint, steps): """ Continues AlphaZero training for a model stored in 'path', from checkpoint 'checkpoint' (use checkpoint=-1 for most recent version of the model). And continues to train for 'steps' number of training steps. """ needed_files = [ "config.json", "learner.jsonl", "checkpoint-{}.data-00000-of-00001".format(checkpoint), "checkpoint-{}.index".format(checkpoint), "checkpoint-{}.meta".format(checkpoint), ] # Ensure the path is correct and all the files exist. if not os.path.isdir(path): sys.exit("{} is not a directory.".format(path)) for file in needed_files: if not os.path.exists(os.path.join(path, file)): sys.exit("Could not find {} in {}.".format(file, path)) # Read the config json into a dictionary. config_json = {} with open(os.path.join(path, "config.json"), "r") as f: config_json = json.load(f) # Read each line of the learner json into a list. learner_json_lines = [] with open(os.path.join(path, "learner.jsonl"), "r") as f: learner_json_lines = f.readlines() # Get the line we are interested in (specified by the checkpoint). learner_end_index = checkpoint if checkpoint == -1 else checkpoint - 1 learner_json_end = json.loads(learner_json_lines[learner_end_index]) # Retrieve the statistics and data we need. end_stats = _end_stats_from_json(learner_json_end) config_json["max_steps"] = end_stats.end_step + steps config = _config_from_json(config_json) print("Writing logs and checkpoints to:", config.path) print("Model type: %s(%s, %s)" % (config.nn_model, config.nn_width, config.nn_depth)) # Save the updated config file. with open(os.path.join(config.path, "config.json"), "w") as fp: fp.write(json.dumps(config._asdict(), indent=2, sort_keys=True) + "\n") # Save the updated learner file if needed. if checkpoint != -1: _delete_newer_checkpoints(config.path, checkpoint) with open(os.path.join(config.path, "learner.jsonl"), "w") as fp: fp.write("".join(learner_json_lines[:checkpoint])) game = pyspiel.load_game(config.game) actors = [ spawn.Process(actor, kwargs={ "game": game, "config": config, "num": i, "checkpoint": checkpoint }) for i in range(config.actors) ] evaluators = [ spawn.Process(evaluator, kwargs={ "game": game, "config": config, "num": i, "checkpoint": checkpoint }) for i in range(config.evaluators) ] def broadcast(msg): for proc in actors + evaluators: proc.queue.put(msg) try: learner( game=game, config=config, actors=actors, # pylint: disable=missing-kwoa evaluators=evaluators, broadcast_fn=broadcast, end_stats=end_stats, checkpoint=checkpoint) except (KeyboardInterrupt, EOFError): print("Caught a KeyboardInterrupt, stopping early.") finally: broadcast("") for proc in actors + evaluators: proc.join()