def snapshot_kl_divs(run): import pandas as pd from pavlov import runs, storage from boardlaw import hex from boardlaw.main import mix import torch from tqdm.auto import tqdm m = storage.load_raw(run, 'model') worlds = mix(hex.Hex.initial(n_envs=16*1024, boardsize=runs.info(run)['params']['boardsize'])) logits = {} for idx in tqdm(storage.snapshots(run)): sd = storage.load_snapshot(run, idx)['agent'] m.load_state_dict(storage.expand(sd)['network']) logits[idx] = m(worlds).logits.detach() kldivs = {} for i in logits: for j in logits: li = logits[i] lj = logits[j] terms = -li.exp().mul(lj - li) mask = torch.isfinite(terms) kldiv = terms.where(mask, torch.zeros_like(terms)).sum(-1) kldivs[i, j] = kldiv.mean().item() df = pd.Series(kldivs).unstack() return df
def ablate_run_snapshots(run): for i, info in tqdm(storage.snapshots(run).items()): if (i == 0) or (np.log2(i) % 1 == 0): pass else: print('Removing', run, info['path'].name) files.remove(run, info['path'].name)
def snapshot_data(new_runs): snapshots = {} for _, r in tqdm(list(new_runs.iterrows()), desc='snapshots'): for i, s in storage.snapshots(r.run).items(): stored = storage.load_snapshot(r.run, i) if 'n_samples' in stored: snapshots[r.run, i] = { 'samples': stored['n_samples'], 'flops': stored['n_flops'] } snapshots = (pd.DataFrame.from_dict( snapshots, orient='index').rename_axis(index=('run', 'idx')).reset_index()) # snapshots['id'] = snapshots.index.to_series() return snapshots
def load(run): snapshots = pd.DataFrame.from_dict(storage.snapshots(run), orient='index') info, losses = {}, {} for i, row in snapshots.iterrows(): losses[row.boardsize, row.depth, row.width] = storage.load_snapshot(run, i)['losses'] info[row.boardsize, row.depth, row.width] = { 'macs': row.n_macs, 'params': row.n_params } losses = pd.DataFrame(losses) losses.index.name = 'step' losses.columns.names = ('boardsize', 'depth', 'width') info = pd.DataFrame(info) info.columns.names = ('boardsize', 'depth', 'width') return losses, info
def test_evaluator(): from pavlov import runs, storage from boardlaw.arena import common n_envs_per = 512 df = runs.pandas(description='cat/nodes') names = [] for r in df.index: snaps = storage.snapshots(r) for i in snaps: names.append((r, i)) names = names[:12] games = pd.DataFrame(0, names, names) from IPython import display start = time.time() results = [] moves, matches = 0, 0 for rs, stats in evaluate(worldfunc, agentfunc, games, chunksize=4, n_envs_per=n_envs_per): results.extend(rs) moves += sum(r.moves for r in rs) matches += len(rs) duration = time.time() - start display.clear_output(wait=True) print( f'{moves/duration:.0f} moves/s, {60*matches/duration:.0f} matches/min' ) from collections import defaultdict counts = defaultdict(lambda: 0) for r in results: counts[r.names] += r.games assert len(counts) == len(names) * (len(names) - 1) assert set(counts.values()) == {n_envs_per}
def snapshot_agents(run, agentfunc, **kwargs): if not isinstance(run, (int, str)): agents = {} for r in run: agents.update(snapshot_agents(r, agentfunc, **kwargs)) return agents period = kwargs.get('period', 1) tail = kwargs.get('tail', int(1e6)) try: stored = pd.DataFrame.from_dict( storage.snapshots(run), orient='index').tail(tail).iloc[::period] except ValueError: return {} else: agents = {} for idx, info in stored.iterrows(): if idx % period == 0: name = pd.Timestamp( info['_created']).strftime(r'%y%m%d-%H%M%S-snapshot') agents[name] = common.agent(run, idx, device='cuda') return agents
def test_chunk_evaluator(): from pavlov import runs, storage from boardlaw.arena import common df = runs.pandas(description='cat/nodes') agents = {} for r in df.index: snaps = storage.snapshots(r) for i in snaps: agents[f'{r}.{i}'] = common.agent(r, i, 'cuda') agents = {k: agents[k] for k in list(agents)[:100]} worldfunc = lambda n_envs: common.worlds( df.index[0], n_envs, device='cuda') evaluator = ChunkEvaluator(worldfunc, agents, 512) from IPython import display results = [] while not evaluator.finished(): results.extend(evaluator.step()) display.clear_output(wait=True) evaluator.report()
def adam_over_time(run, B): import matplotlib.pyplot as plt from tqdm.auto import tqdm sizes = arrdict.stack( [adam_way(run, idx, B) for idx in tqdm(storage.snapshots(run))]) plt.plot(sizes)