def self_play(tracker_queue, net, replay_queue, probs_queue, loop_count, device="cpu"): mcts_stores = [mcts.MCTS(), mcts.MCTS()] for j in range(SELF_PLAY_PERIOD): i = loop_count * SELF_PLAY_PERIOD + j t = time.perf_counter() status = "" _, steps = model.play_game(mcts_stores, replay_queue, probs_queue, net, net, steps_before_tau_0=STEPS_BEFORE_TAU_0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, device=device, status=status) game_steps = steps dt = time.perf_counter() - t speed_steps = game_steps / dt status = "episode #{}, steps {:3d}, processing time {:5.2f} [s], steps/s {:5.2f}".format( i, steps, dt, speed_steps) tracker_queue.put(("speed_steps", speed_steps, i)) print("episode #%d, steps %3d, steps/s %5.2f" % (i, game_steps, speed_steps))
def play(val, lock, mcts_store, net, best_idx, username, device, step_idx): while True: t = time.time() _, game_steps = model.play_game(val, mcts_store, None, net, net, steps_before_tau_0=STEPS_BEFORE_TAU_0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, best_idx=best_idx, url=URL, username=username, device=device) game_nodes = len(mcts_store) dt = time.time() - t speed_steps = game_steps / dt speed_nodes = game_nodes / dt bf = False lock.acquire() if game_steps > 0: val[1] += 1 if val[0] <= 0: bf = True lock.release() if game_steps > 0: print( "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d" % (step_idx + val[1], game_steps, game_nodes, speed_steps, speed_nodes, best_idx)) if bf: break
def evaluate(net1, net2, rounds, device="cpu"): n1_win, n2_win = 0, 0 mcts_stores = [mcts.MCTS(), mcts.MCTS()] for r_idx in range(rounds): r, _ = model.play_game(mcts_stores=mcts_stores, replay_buffer=None, net1=net1, net2=net2, steps_before_tau_0=0, mcts_searches=20, mcts_batch_size=16, device=device) if r < -0.5: n2_win += 1 elif r > 0.5: n1_win += 1 return n1_win / (n1_win + n2_win)
def eval(val, lock, net1, net2, device, cpuf): if cpuf: net1.to(device); net2.to(device) mcts_stores = [mcts.MCTS(), mcts.MCTS()] while True: are = random.randrange(0, 2) r, _ = model.play_game(val, mcts_stores, None, net1=net1 if are<1 else net2, net2=net2 if are<1 else net1, steps_before_tau_0=20, mcts_searches=40, mcts_batch_size=40, best_idx=-1, device=device) bf = False lock.acquire() if r!=None: val[1 if (r > 0.5 and are<1) or (r<-0.5 and are>=1) else 2] += 1 print("%d:%d %d/%d"%(are,r,val[1],val[2]),end=' ', flush=True) if (val[1]+val[2]) % 5 <1: print() if val[0]<=0: bf=True lock.release() if bf: break
def evaluate(net1, net2, rounds, device="cpu"): n1_win, n2_win = 0, 0 mcts_stores = [mcts.MCTS(), mcts.MCTS()] for r_idx in range(rounds): r, step = model.play_game(None, mcts_stores, None, net1 if r_idx < rounds // 2 else net2, net2 if r_idx < rounds // 2 else net1, steps_before_tau_0=game.MAX_TURN, mcts_searches=40, mcts_batch_size=40, best_idx=-1, device=device) if (r > 0 and r_idx < rounds // 2) or (r < 0 and r_idx >= rounds // 2): n1_win += 1 if r != 0: n2_win += 1 print(r_idx, r, step) return (n1_win / n2_win) if n2_win > 0 else 0.5
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=0.9) replay_buffer = collections.deque(maxlen=REPLAY_BUFFER) mcts_store = mcts.MCTS() step_idx = 0 best_idx = 0 with ptan.common.utils.TBMeanTracker(writer, batch_size=10) as tb_tracker: while True: t = time.time() prev_nodes = len(mcts_store) game_steps = 0 for _ in range(PLAY_EPISODES): _, steps = model.play_game(mcts_store, replay_buffer, best_net.target_model, best_net.target_model, steps_before_tau_0=STEPS_BEFORE_TAU_0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, device=device) game_steps += steps game_nodes = len(mcts_store) - prev_nodes dt = time.time() - t speed_steps = game_steps / dt speed_nodes = game_nodes / dt tb_tracker.track("speed_steps", speed_steps, step_idx) tb_tracker.track("speed_nodes", speed_nodes, step_idx) print("Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d, replay %d" % ( step_idx, game_steps, game_nodes, speed_steps, speed_nodes, best_idx, len(replay_buffer))) step_idx += 1 if len(replay_buffer) < MIN_REPLAY_TO_TRAIN: continue
nets.append((fname, net)) total_agent = {} total_pairs = {} for idx1, n1 in enumerate(nets): for idx2, n2 in enumerate(nets): if idx1 == idx2: continue wins, losses, draws = 0, 0, 0 ts = time.time() for _ in range(args.rounds): r, _ = model.play_game(mcts_stores=None, replay_buffer=None, net1=n1[1], net2=n2[1], steps_before_tau_0=0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, device=device) if r > 0.5: wins += 1 elif r < -0.5: losses += 1 else: draws += 1 speed_games = args.rounds / (time.time() - ts) name_1, name_2 = n1[0], n2[0] print("%s vs %s -> w=%d, l=%d, d=%d" % (name_1, name_2, wins, losses, draws)) sys.stderr.write("Speed %.2f games/s\n" % speed_games) sys.stdout.flush()
actions_n=actionTable.AllMoveLength).to(device) net.load_state_dict(checkpoint['model'], strict=False) net.eval() net.share_memory() if os.name == 'nt' and args.cuda: mcts_store = mcts.MCTS() for i in range(PLAY_EPISODE): t = time.time() _, game_steps = model.play_game( None, mcts_store, None, net, net, steps_before_tau_0=STEPS_BEFORE_TAU_0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, best_idx=best_idx, url=URL, username=username, device=device) game_nodes = len(mcts_store) dt = time.time() - t speed_steps = game_steps / dt speed_nodes = game_nodes / dt step_idx += 1 print( "Step %d, steps %3d, leaves %4d, steps/s %5.2f, leaves/s %6.2f, best_idx %d" % (step_idx, game_steps, game_nodes, speed_steps, speed_nodes, best_idx))
total_agent = {} total_pairs = {} for idx1, n1 in enumerate(nets): for idx2, n2 in enumerate(nets): if idx1 == idx2: continue wins, losses, draws = 0, 0, 0 ts = time.time() for _ in range(args.rounds): r, _ = model.play_game(None, None, queue=None, net1=n1[1], net2=n2[1], steps_before_tau_0=game.MAX_TURN, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, best_idx=-1, device=device) print(r) if r > 0.5: wins += 1 elif r < -0.5: losses += 1 else: draws += 1 speed_games = args.rounds / (time.time() - ts) name_1, name_2 = n1[0], n2[0] print("%s vs %s -> w=%d, l=%d, d=%d" % (name_1, name_2, wins, losses, draws))
net = model.Net(model.OBS_SHAPE, game.GAME_COLS) net.load_state_dict(torch.load(fname, map_location=lambda storage, loc: storage)) net = net.to(device) nets.append((fname, net)) total_agent = {} total_pairs = {} for idx1, n1 in enumerate(nets): for idx2, n2 in enumerate(nets): if idx1 == idx2: continue wins, losses, draws = 0, 0, 0 ts = time.time() for _ in range(args.rounds): r, _ = model.play_game(mcts_stores=None, replay_buffer=None, net1=n1[1], net2=n2[1], steps_before_tau_0=0, mcts_searches=MCTS_SEARCHES, mcts_batch_size=MCTS_BATCH_SIZE, device=device) if r > 0.5: wins += 1 elif r < -0.5: losses += 1 else: draws += 1 speed_games = args.rounds / (time.time() - ts) name_1, name_2 = n1[0], n2[0] print("%s vs %s -> w=%d, l=%d, d=%d" % (name_1, name_2, wins, losses, draws)) sys.stderr.write("Speed %.2f games/s\n" % speed_games) sys.stdout.flush() game.update_counts(total_agent, name_1, (wins, losses, draws)) game.update_counts(total_agent, name_2, (losses, wins, draws)) game.update_counts(total_pairs, (name_1, name_2), (wins, losses, draws))