Ejemplo n.º 1
0
 def selfplay_thread():
     nonlocal net, temperature, num_games
     torch.set_num_threads(1)
     with torch.no_grad():
         while True:
             if net.metadata["iteration"] == 1 and options.get(
                     "fast_first_iteration"):
                 game_options = {
                     **options,
                     "zero_value": True,  # ignore value net
                     "num_visits": 10,
                     "detailed_visits_prob": 1.0,
                     "kl_surprise_weights": False,
                 }
                 temp_fn = lambda mv: 1.0
             else:
                 game_options = options
                 temp_fn = temperature
             game_states = play_game(
                 net_evaluator=evaluate,
                 game_class=game_class,
                 temperature=temp_fn,
                 **game_options,
             )
             if game_states:
                 trainer.add_sample.remote(game_states)
             num_games += 1
Ejemplo n.º 2
0
def selfplay_proc(cpu, game_class, options):
    num_games_before_check = 5
    temperature = lambda mv: 1.0 if mv < 4 else 0.1  # selfplay param
    net = game_class.create_net(cuda=True, **options)
    print(net.device)
    with torch.no_grad():
        for iter in range(5):
            start = time.time()
            samples = 0
            for i in range(num_games_before_check):
                game_states, endstate = play_game(net,
                                                  game_class,
                                                  temperature=temperature,
                                                  **options)
                samples += len(game_states)
            print(
                f"CPU {cpu} self-play generated {samples} samples in {time.time()-start:.1f}s"
            )
Ejemplo n.º 3
0
 def selfplay_thread(tid):
     nonlocal games_played
     torch.set_num_threads(1)
     games_played = 0
     with torch.no_grad():
         while True:
             start = time.time()
             samples = 0
             game_states, endstate = play_game(
                 net_evaluator=evaluate,
                 game_class=game_class,
                 temperature=temperature,
                 **options,
             )
             samples += len(game_states)
             dt = time.time() - start
             #                    games_q.put((game_states, endstate))
             games_played += 1
             print(
                 f"[{games_played}] CPU {cpu} thread {tid} self-play generated {samples} samples (out of {endstate['end_move']} moves) in {dt:.1f}s"
             )
Ejemplo n.º 4
0
def test_play():
    game = GoMokuState
    net = game.create_net()
    play_game(net, game, verbose=True)
Ejemplo n.º 5
0
parser = argparse.ArgumentParser(description="Self-play visualization.")
parser.add_argument("--game", type=str, help="Game to play")
parser.add_argument("--tag", type=str, help="Tag for experiment", default="")
args = parser.parse_args()

game = args.game
if game == "cg":
    game_class = CaptureGoState
elif game == "pxcg":
    game_class = PixelCaptureGoState
elif game == "nim":
    game_class = NimState
elif game == "oth":
    game_class = OthelloState
else:
    raise Exception("unknown game")

net = game_class.create_net(tag=args.tag)
options = {}

print(f"Loaded net {net.metadata['filename']} on cuda? {net.device}")
temp_fn = lambda mv: 1.0 if mv < 2 else 0.1
with torch.no_grad():
    game_states = play_game(
        net_evaluator=net.evaluate_sample,
        game_class=game_class,
        temperature=temp_fn,
        verbose=True,
    )
Ejemplo n.º 6
0
            players.append(cls.create_net(net_ts=ts, tag=tag, cuda=False))
        except Exception as e:
            print(e)

print(len(players), "players loaded")

options = {"num_visits": 1, "cpuct": 1.5, "force_win": True}
options = {"num_visits": 1, "cpuct": 1.1}

elocalc = BayesElo(players)

for p1 in tqdm(players, ascii=True):
    for p2 in tqdm(players, ascii=True):
        if p1 is not p2:
            for _ in range(num_games):
                game_states, endstate = play_game([p1, p2], game_class, temperature=temp_fn, **options)
                result = endstate["value"][0] - endstate["value"][1]
                training_samples = play_game(
                    net_evaluator=[p1.evaluate_sample, p2.evaluate_sample],
                    game_class=game_class,
                    temperature=temp_fn,
                    **options
                )
                v = training_samples[-1]["value"]
                result = v[0] - v[1]
                elocalc.add_result(p1, p2, result)

df, aux = elocalc.summary_df()
pd.set_option("display.max_rows", 500)

df, aux = elocalc.summary_df(aux_zero=True)